xref: /openbsd-src/gnu/usr.bin/perl/Porting/bench.pl (revision e068048151d29f2562a32185e21a8ba885482260)
1#!/usr/bin/perl
2#
3# A tool for analysing the performance of the code snippets found in
4# t/perf/benchmarks or similar
5
6
7=head1 NAME
8
9bench.pl - Compare the performance of perl code snippets across multiple
10perls.
11
12=head1 SYNOPSIS
13
14    # Basic: run the tests in t/perf/benchmarks against two or
15    # more perls
16
17    bench.pl [options] perlA[=labelA] perlB[=labelB] ...
18
19    # run the tests against the same perl twice, with varying options
20
21    bench.pl [options] perlA=bigint --args='-Mbigint' perlA=plain
22
23    # Run bench on blead, saving results to file; then modify the blead
24    # binary, and benchmark again, comparing against the saved results
25
26    bench.pl [options] --write=blead.time ./perl=blead
27    # ... hack hack hack, updating ./perl ...
28    bench.pl --read=blead.time ./perl=hacked
29
30    # You can also combine --read with --write and new benchmark runs
31
32    bench.pl --read=blead.time --write=last.time -- ./perl=hacked
33
34=head1 DESCRIPTION
35
36By default, F<bench.pl> will run code snippets found in
37F<t/perf/benchmarks> (or similar) under cachegrind, in order to calculate
38how many instruction reads, data writes, branches, cache misses, etc. that
39one execution of the snippet uses. Usually it will run them against two or
40more perl executables and show how much each test has gotten better or
41worse.
42
43It is modelled on the F<perlbench> tool, but since it measures instruction
44reads etc., rather than timings, it is much more precise and reproducible.
45It is also considerably faster, and is capable of running tests in
46parallel (with C<-j>). Rather than  displaying a single relative
47percentage per test/perl combination, it displays values for 13 different
48measurements, such as instruction reads, conditional branch misses etc.
49
50There are options to write the raw data to a file, and to read it back.
51This means that you can view the same run data in different views with
52different selection and sort options. You can also use this mechanism
53to save the results of timing one perl, and then read it back while timing
54a modification, so that you don't have rerun the same tests on the same
55perl over and over, or have two perl executables built at the same time.
56
57The optional C<=label> after each perl executable is used in the display
58output. If you are doing a two step benchmark then you should provide
59a label for at least the "base" perl. If a label isn't specified, it
60defaults to the name of the perl executable. Labels must be unique across
61all current executables, plus any previous ones obtained via --read.
62
63In its most general form, the specification of a perl executable is:
64
65    path/perl=+mylabel --args='-foo -bar' --args='-baz' \
66                       --env='A=a' --env='B=b'
67
68This defines how to run the executable F<path/perl>. It has a label,
69which due to the C<+>, is appended to the binary name to give a label of
70C<path/perl=+mylabel> (without the C<+>, the label would be just
71C<mylabel>).
72
73It can be optionally followed by one or more C<--args> or C<--env>
74switches, which specify extra command line arguments or environment
75variables to use when invoking that executable. Each C<--env> switch
76should be of the form C<--env=VARIABLE=value>. Any C<--arg> values are
77concatenated to the eventual command line, along with the global
78C<--perlargs> value if any. The above would cause a system() call looking
79something like:
80
81    PERL_HASH_SEED=0 A=a B=b valgrind --tool=cachegrind \
82        path/perl -foo -bar -baz ....
83
84=head1 OPTIONS
85
86=head2 General options
87
88=over 4
89
90=item *
91
92--action=I<foo>
93
94What action to perform. The default is  I<grind>, which runs the benchmarks
95using I<cachegrind> as the back end. The only other action at the moment is
96I<selftest>, which runs some basic sanity checks and produces TAP output.
97
98=item *
99
100--debug
101
102Enable debugging output.
103
104=item *
105
106---help
107
108Display basic usage information.
109
110=item *
111
112-v
113--verbose
114
115Display progress information.
116
117=back
118
119=head2 Test selection options
120
121=over 4
122
123=item *
124
125--tests=I<FOO>
126
127Specify a subset of tests to run (or in the case of C<--read>, to read).
128It may be either a comma-separated list of test names, or a regular
129expression. For example
130
131    --tests=expr::assign::scalar_lex,expr::assign::2list_lex
132    --tests=/^expr::/
133
134
135=back
136
137=head2 Input options
138
139=over 4
140
141
142=item *
143
144-r I<file>
145--read=I<file>
146
147Read in saved data from a previous C<--write> run from the specified file.
148If C<--tests> is present too, then only tests matching those conditions
149are read from the file.
150
151C<--read> may be specified multiple times, in which case the results
152across all files are aggregated. The list of test names from each file
153(after filtering by C<--tests>) must be identical across all files.
154
155This list of tests is used instead of that obtained from the normal
156benchmark file (or C<--benchfile>) for any benchmarks that are run.
157
158The perl labels must be unique across all read in test results.
159
160Requires C<JSON::PP> to be available.
161
162=back
163
164=head2 Benchmarking options
165
166Benchmarks will be run for all perls specified on the command line.
167These options can be used to modify the benchmarking behavior:
168
169=over 4
170
171=item *
172
173--autolabel
174
175Generate a unique label for every executable which doesn't have an
176explicit C<=label>. Works by stripping out common prefixes and suffixes
177from the executable names, then for any non-unique names, appending
178C<-0>, C<-1>, etc. text directly surrounding the unique part which look
179like version numbers (i.e. which match C</[0-9\.]+/>) aren't stripped.
180For example,
181
182    perl-5.20.0-threaded  perl-5.22.0-threaded  perl-5.24.0-threaded
183
184stripped to unique parts would be:
185
186    20  22  24
187
188but is actually only stripped down to:
189
190    5.20.0  5.22.0  5.24.0
191
192If the final results are plain integers, they are prefixed with "p"
193to avoid looking like column numbers to switches like C<--norm=2>.
194
195
196=item *
197
198--benchfile=I<foo>
199
200The path of the file which contains the benchmarks (F<t/perf/benchmarks>
201by default).
202
203=item *
204
205--grindargs=I<foo>
206
207Optional command-line arguments to pass to all cachegrind invocations.
208
209=item *
210
211-j I<N>
212--jobs=I<N>
213
214Run I<N> jobs in parallel (default 1). This determines how many cachegrind
215process will run at a time, and should generally be set to the number
216of CPUs available.
217
218=item *
219
220--perlargs=I<foo>
221
222Optional command-line arguments to pass to every perl executable.  This
223may optionaly be combined with C<--args> switches following individual
224perls. For example:
225
226    bench.pl --perlargs='-Ilib -It/lib' .... \
227        perlA --args='-Mstrict' \
228        perlB --args='-Mwarnings'
229
230would cause the invocations
231
232    perlA -Ilib -It/lib -Mstrict
233    perlB -Ilib -It/lib -Mwarnings
234
235=back
236
237=head2 Output options
238
239Any results accumulated via --read or by running benchmarks can be output
240in any or all of these three ways:
241
242=over 4
243
244=item *
245
246-w I<file>
247--write=I<file>
248
249Save the raw data to the specified file. It can be read back later with
250C<--read>. If combined with C<--read> then the output file will be
251the merge of the file read and any additional perls added on the command
252line.
253
254Requires C<JSON::PP> to be available.
255
256=item *
257
258--bisect=I<field,minval,maxval>
259
260Exit with a zero status if the named field is in the specified range;
261exit with 1 otherwise. It will complain if more than one test or perl has
262been specified. It is intended to be called as part of a bisect run, to
263determine when something changed.  For example,
264
265    bench.pl -j 8 --tests=foo --bisect=Ir,100,105 --perlargs=-Ilib \
266        ./miniperl
267
268might be called from bisect to find when the number of instruction reads
269for test I<foo> falls outside the range 100..105.
270
271=item *
272
273--show
274
275Display the results to stdout in human-readable form.  This is enabled by
276default, except with --write and --bisect. The following sub-options alter
277how --show behaves.
278
279=over 4
280
281=item *
282
283--average
284
285Only display the overall average, rather than the results for each
286individual test.
287
288=item *
289
290--compact=I<perl>
291
292Display the results for a single perl executable in a compact form.
293Which perl to display is specified in the same manner as C<--norm>.
294
295=item *
296
297--fields=I<a,b,c>
298
299Display only the specified fields; for example,
300
301    --fields=Ir,Ir_m,Ir_mm
302
303If only one field is selected, the output is in more compact form.
304
305=item *
306
307--norm=I<foo>
308
309Specify which perl column in the output to treat as the 100% norm.
310It may be:
311
312=over
313
314* a column number (0..N-1),
315
316* a negative column number (-1..-N) which counts from the right (so -1 is
317the right-most column),
318
319* or a perl executable name,
320
321* or a perl executable label.
322
323=back
324
325It defaults to the leftmost column.
326
327=item *
328
329--raw
330
331Display raw data counts rather than percentages in the outputs. This
332allows you to see the exact number of intruction reads, branch misses etc.
333for each test/perl combination. It also causes the C<AVERAGE> display
334per field to be calculated based on the average of each tests's count
335rather than average of each percentage. This means that tests with very
336high counts will dominate.
337
338=item *
339
340--sort=I<field:perl>
341
342Order the tests in the output based on the value of I<field> in the
343column I<perl>. The I<perl> value is as per C<--norm>. For example
344
345    bench.pl --sort=Dw:perl-5.20.0 \
346        perl-5.16.0 perl-5.18.0 perl-5.20.0
347
348=back
349
350=back
351
352=cut
353
354
355
356use 5.010000;
357use warnings;
358use strict;
359use Getopt::Long qw(:config no_auto_abbrev require_order);
360use IPC::Open2 ();
361use IO::Select;
362use IO::File;
363use POSIX ":sys_wait_h";
364
365# The version of the file format used to save data. We refuse to process
366# the file if the integer component differs.
367
368my $FORMAT_VERSION = 1.0;
369
370# The fields we know about
371
372my %VALID_FIELDS = map { $_ => 1 }
373    qw(Ir Ir_m1 Ir_mm Dr Dr_m1 Dr_mm Dw Dw_m1 Dw_mm COND COND_m IND IND_m);
374
375sub usage {
376    die <<EOF;
377Usage: $0 [options] -- perl[=label] ...
378
379General options:
380
381  --action=foo       What action to perform [default: grind]:
382                        grind      run the code under cachegrind
383                        selftest   perform a selftest; produce TAP output
384  --debug            Enable verbose debugging output.
385  --help             Display this help.
386  -v|--verbose       Display progress information.
387
388
389Selection:
390
391  --tests=FOO        Select only the specified tests for reading, benchmarking
392                       and display.  FOO may be either a list of tests or
393                       a pattern: 'foo,bar,baz' or '/regex/';
394                       [default: all tests].
395
396Input:
397
398  -r|--read=file     Read in previously saved data from the specified file.
399                        May be repeated, and be used together with new
400                        benchmarking to create combined results.
401
402Benchmarking:
403  Benchmarks will be run for any perl specified on the command line.
404  These options can be used to modify the benchmarking behavior:
405
406  --autolabel        generate labels for any executables without one
407  --benchfile=foo    File containing the benchmarks.
408                         [default: t/perf/benchmarks].
409  --grindargs=foo    Optional command-line args to pass to cachegrind.
410  -j|--jobs=N        Run N jobs in parallel [default 1].
411  --perlargs=foo     Optional command-line args to pass to each perl to run.
412
413Output:
414  Any results accumulated via --read or running benchmarks can be output
415  in any or all of these three ways:
416
417  -w|--write=file    Save the raw data to the specified file (may be read
418                       back later with --read).
419
420  --bisect=f,min,max Exit with a zero status if the named field f is in
421                       the specified min..max range; exit 1 otherwise.
422                       Produces no other output. Only legal if a single
423                       benchmark test has been specified.
424
425  --show             Display the results to stdout in human-readable form.
426                       This is enabled by default, except with --write and
427                       --bisect. The following sub-options alter how
428                       --show behaves.
429
430    --average          Only display average, not individual test results.
431    --compact=perl     Display the results of a single perl in compact form.
432                       Which perl specified like --norm
433    --fields=a,b,c     Display only the specified fields (e.g. Ir,Ir_m,Ir_mm).
434    --norm=perl        Which perl column to treat as 100%; may be a column
435                         number (0..N-1) or a perl executable name or label;
436                         [default: 0].
437    --raw              Display raw data counts rather than percentages.
438    --sort=field:perl  Sort the tests based on the value of 'field' in the
439                       column 'perl'. The perl value is as per --norm.
440
441
442The command line ends with one or more specified perl executables,
443which will be searched for in the current \$PATH. Each binary name may
444have an optional =LABEL appended, which will be used rather than the
445executable name in output. The labels must be unique across all current
446executables and previous runs obtained via --read. Each executable may
447optionally be succeeded by --args= and --env= to specify per-executable
448arguments and environmenbt variables:
449
450    perl-5.24.0=strict --args='-Mwarnings -Mstrict' --env='FOO=foo' \
451    perl-5.24.0=plain
452EOF
453}
454
455my %OPTS = (
456    action    => 'grind',
457    average   => 0,
458    benchfile => undef,
459    bisect    => undef,
460    compact   => undef,
461    debug     => 0,
462    grindargs => '',
463    fields    => undef,
464    jobs      => 1,
465    norm      => 0,
466    perlargs  => '',
467    raw       => 0,
468    read      => undef,
469    show      => undef,
470    sort      => undef,
471    tests     => undef,
472    verbose   => 0,
473    write     => undef,
474);
475
476
477# process command-line args and call top-level action
478
479{
480    GetOptions(
481        'action=s'    => \$OPTS{action},
482        'average'     => \$OPTS{average},
483        'autolabel'   => \$OPTS{autolabel},
484        'benchfile=s' => \$OPTS{benchfile},
485        'bisect=s'    => \$OPTS{bisect},
486        'compact=s'   => \$OPTS{compact},
487        'debug'       => \$OPTS{debug},
488        'grindargs=s' => \$OPTS{grindargs},
489        'help|h'      => \$OPTS{help},
490        'fields=s'    => \$OPTS{fields},
491        'jobs|j=i'    => \$OPTS{jobs},
492        'norm=s'      => \$OPTS{norm},
493        'perlargs=s'  => \$OPTS{perlargs},
494        'raw'         => \$OPTS{raw},
495        'read|r=s@'   => \$OPTS{read},
496        'show'        => \$OPTS{show},
497        'sort=s'      => \$OPTS{sort},
498        'tests=s'     => \$OPTS{tests},
499        'v|verbose'   => \$OPTS{verbose},
500        'write|w=s'   => \$OPTS{write},
501    ) or die "Use the -h option for usage information.\n";
502
503    usage if $OPTS{help};
504
505
506    if (defined $OPTS{read} or defined $OPTS{write}) {
507        # fail early if it's not present
508        require JSON::PP;
509    }
510
511    if (defined $OPTS{fields}) {
512        my @f = split /,/, $OPTS{fields};
513        for (@f) {
514            die "Error: --fields: unknown field '$_'\n"
515                unless $VALID_FIELDS{$_};
516        }
517        my %f = map { $_ => 1 } @f;
518        $OPTS{fields} = \%f;
519    }
520
521    my %valid_actions = qw(grind 1 selftest 1);
522    unless ($valid_actions{$OPTS{action}}) {
523        die "Error: unrecognised action '$OPTS{action}'\n"
524          . "must be one of: " . join(', ', sort keys %valid_actions)."\n";
525    }
526
527    if (defined $OPTS{sort}) {
528        my @s = split /:/, $OPTS{sort};
529        if (@s != 2) {
530            die "Error: --sort argument should be of the form field:perl: "
531              . "'$OPTS{sort}'\n";
532        }
533        my ($field, $perl) = @s;
534        die "Error: --sort: unknown field '$field'\n"
535            unless $VALID_FIELDS{$field};
536        # the 'perl' value will be validated later, after we have processed
537        # the perls
538        $OPTS{'sort-field'} = $field;
539        $OPTS{'sort-perl'}  = $perl;
540    }
541
542    # show is the default output action
543    $OPTS{show} = 1 unless $OPTS{write} || $OPTS{bisect};
544
545    if ($OPTS{action} eq 'grind') {
546        do_grind(\@ARGV);
547    }
548    elsif ($OPTS{action} eq 'selftest') {
549        if (@ARGV) {
550            die "Error: no perl executables may be specified with selftest\n"
551        }
552        do_selftest();
553    }
554}
555exit 0;
556
557
558# Given a hash ref keyed by test names, filter it by deleting unwanted
559# tests, based on $OPTS{tests}.
560
561sub filter_tests {
562    my ($tests) = @_;
563
564    my $opt = $OPTS{tests};
565    return unless defined $opt;
566
567    my @tests;
568
569    if ($opt =~ m{^/}) {
570        $opt =~ s{^/(.+)/$}{$1}
571            or die "Error: --tests regex must be of the form /.../\n";
572        for (keys %$tests) {
573            delete $tests->{$_} unless /$opt/;
574        }
575    }
576    else {
577        my %t;
578        for (split /,/, $opt) {
579            $t{$_} = 1;
580            next if exists $tests->{$_};
581
582            my $e = "Error: no such test found: '$_'\n";
583            if ($OPTS{verbose}) {
584                $e .= "Valid test names are:\n";
585                $e .= "  $_\n" for sort keys %$tests;
586            }
587            else {
588                $e .= "Re-run with --verbose for a list of valid tests.\n";
589            }
590            die $e;
591        }
592        for (keys %$tests) {
593            delete $tests->{$_} unless exists $t{$_};
594        }
595    }
596    die "Error: no tests to run\n" unless %$tests;
597}
598
599
600# Read in the test file, and filter out any tests excluded by $OPTS{tests}
601# return a hash ref { testname => { test }, ... }
602# and an array ref of the original test names order,
603
604sub read_tests_file {
605    my ($file) = @_;
606
607    my $ta;
608    {
609        local @INC = ('.');
610        $ta = do $file;
611    }
612    unless ($ta) {
613        die "Error: can't load '$file': code didn't return a true value\n"
614                if defined $ta;
615        die "Error: can't parse '$file':\n$@\n" if $@;
616        die "Error: can't read '$file': $!\n";
617    }
618
619    # validate and process each test
620
621    {
622        my %valid = map { $_ => 1 } qw(desc setup code pre post compile);
623        my @tests = @$ta;
624        if (!@tests || @tests % 2 != 0) {
625            die "Error: '$file' does not contain evenly paired test names and hashes\n";
626        }
627        while (@tests) {
628            my $name = shift @tests;
629            my $hash = shift @tests;
630
631            unless ($name =~ /^[a-zA-Z]\w*(::\w+)*$/) {
632                die "Error: '$file': invalid test name: '$name'\n";
633            }
634
635            for (sort keys %$hash) {
636                die "Error: '$file': invalid key '$_' for test '$name'\n"
637                    unless exists $valid{$_};
638            }
639
640            # make description default to the code
641            $hash->{desc} = $hash->{code} unless exists $hash->{desc};
642        }
643    }
644
645    my @orig_order;
646    for (my $i=0; $i < @$ta; $i += 2) {
647        push @orig_order, $ta->[$i];
648    }
649
650    my $t = { @$ta };
651    filter_tests($t);
652    return $t, \@orig_order;
653}
654
655
656# Process the perl name/label/column argument of options like --norm and
657# --sort.  Return the index of the matching perl.
658
659sub select_a_perl {
660    my ($perl, $perls, $who) = @_;
661    $perls ||= [];
662    my $n = @$perls;
663
664    if ($perl =~ /^-([0-9]+)$/) {
665        my $p = $1;
666        die "Error: $who value $perl outside range -1..-$n\n"
667                                        if $p < 1 || $p > $n;
668        return $n - $p;
669    }
670
671    if ($perl =~ /^[0-9]+$/) {
672        die "Error: $who value $perl outside range 0.." . $#$perls . "\n"
673                                        unless $perl < $n;
674        return $perl;
675    }
676    else {
677        my @perl = grep    $perls->[$_][0] eq $perl
678                        || $perls->[$_][1] eq $perl,
679                        0..$#$perls;
680        unless (@perl) {
681            my $valid = '';
682            for (@$perls) {
683                $valid .= "    $_->[1]";
684                $valid .= "  $_->[0]" if $_->[0] ne  $_->[1];
685                $valid .= "\n";
686            }
687            die "Error: $who: unrecognised perl '$perl'\n"
688              . "Valid perl names are:\n$valid";
689        }
690        die "Error: $who: ambiguous perl '$perl'\n"
691                                        if @perl > 1;
692        return $perl[0];
693    }
694}
695
696
697# Validate the list of perl executables on the command line.
698# The general form is
699#
700#      a_perl_exe[=label] [ --args='perl args'] [ --env='FOO=foo' ]
701#
702# Return a list of [ exe, label, {env}, 'args' ] tuples
703
704sub process_executables_list {
705    my ($read_perls, @cmd_line_args) = @_;
706
707    my @results; # returned, each item is [ perlexe, label, {env}, 'args' ]
708    my %seen_from_reads = map { $_->[1] => 1 } @$read_perls;
709    my %seen;
710    my @labels;
711
712    while (@cmd_line_args) {
713        my $item = shift @cmd_line_args;
714
715        if ($item =~ /^--(.*)$/) {
716            my ($switch, $val) = split /=/, $1, 2;
717            die "Error: unrecognised executable switch '--$switch'\n"
718                unless $switch =~  /^(args|env)$/;
719
720            die "Error: --$switch without a preceding executable name\n"
721                unless @results;
722
723            unless (defined $val) {
724                $val = shift @cmd_line_args;
725                die "Error: --$switch is missing value\n"
726                    unless defined $val;
727            }
728
729            if ($switch eq 'args') {
730                $results[-1][3] .= " $val";
731            }
732            else {
733                # --env
734                $val =~ /^(\w+)=(.*)$/
735                    or die "Error: --env is missing =value\n";
736                $results[-1][2]{$1} = $2;
737            }
738
739            next;
740        }
741
742        # whatever is left must be the name of an executable
743
744        my ($perl, $label) = split /=/, $item, 2;
745        push @labels, $label;
746        unless ($OPTS{autolabel}) {
747            $label //= $perl;
748            $label = $perl.$label if $label =~ /^\+/;
749        }
750
751        die "Error: duplicate label '$label': "
752                        . "each executable must have a unique label\n"
753            if defined $label && $seen{$label}++;
754
755        die "Error: duplicate label '$label': "
756                        . "seen both in --read file and on command line\n"
757            if defined $label && $seen_from_reads{$label};
758
759        my $r = qx($perl -e 'print qq(ok\n)' 2>&1);
760        die "Error: unable to execute '$perl': $r\n" if $r ne "ok\n";
761
762        push @results, [ $perl, $label,  { }, '' ];
763    }
764
765    # make args '' by default
766    for (@results) {
767        push @$_, '' unless @$_ > 3;
768    }
769
770    if ($OPTS{autolabel}) {
771
772        # create a list of [ 'perl-path', $i ] pairs for all
773        # $results[$i] which don't have a label
774        my @labels;
775        for (0..$#results)  {
776            push @labels, [ $results[$_][0], $_ ]
777                        unless defined $results[$_][1];
778        }
779
780        if (@labels) {
781            # strip off common prefixes
782            my $pre = '';
783          STRIP_PREFIX:
784            while (length $labels[0][0]) {
785                my $c = substr($labels[0][0], 0, 1);
786                for my $i (1..$#labels) {
787                    last STRIP_PREFIX if substr($labels[$i][0], 0, 1) ne $c;
788                }
789                substr($labels[$_][0], 0, 1)  = '' for 0..$#labels;
790                $pre .= $c;
791            }
792            # add back any final "version-ish" prefix
793            $pre =~ s/^.*?([0-9\.]*)$/$1/;
794            substr($labels[$_][0], 0, 0) = $pre for 0..$#labels;
795
796            # strip off common suffixes
797            my $post = '';
798          STRIP_SUFFFIX:
799            while (length $labels[0][0]) {
800                my $c = substr($labels[0][0], -1, 1);
801                for my $i (1..$#labels) {
802                    last STRIP_SUFFFIX if substr($labels[$i][0], -1, 1) ne $c;
803                }
804                chop $labels[$_][0] for 0..$#labels;
805                $post = "$c$post";
806            }
807            # add back any initial "version-ish" suffix
808            $post =~ s/^([0-9\.]*).*$/$1/;
809            $labels[$_][0] .= $post for 0..$#labels;
810
811            # avoid degenerate empty string for single executable name
812            $labels[0][0] = '0' if @labels == 1 && !length $labels[0][0];
813
814            # if the auto-generated labels are plain integers, prefix
815            # them with 'p' (for perl) to distinguish them from column
816            # indices (otherwise e.g. --norm=2 is ambiguous)
817
818            if ($labels[0][0] =~ /^\d*$/) {
819                $labels[$_][0] = "p$labels[$_][0]" for 0..$#labels;
820            }
821
822            # now de-duplicate labels
823
824            my (%seen, %index);
825            $seen{$read_perls->[$_][1]}++ for 0..$#$read_perls;
826            $seen{$labels[$_][0]}++ for 0..$#labels;
827
828            for my $i (0..$#labels)  {
829                my $label = $labels[$i][0];
830                next unless $seen{$label} > 1;
831                my $d = length($label) ? '-' : '';
832                my $n = $index{$label} // 0;
833                $n++ while exists $seen{"$label$d$n"};
834                $labels[$i][0] .= "$d$n";
835                $index{$label} = $n + 1;
836            }
837
838            # finally, store them
839            $results[$_->[1]][1]= $_->[0] for @labels;
840        }
841    }
842
843
844    return @results;
845}
846
847
848
849# Return a string containing a perl program which runs the benchmark code
850# $ARGV[0] times. If $body is true, include the main body (setup) in
851# the loop; otherwise create an empty loop with just pre and post.
852# Note that an empty body is handled with '1;' so that a completely empty
853# loop has a single nextstate rather than a stub op, so more closely
854# matches the active loop; e.g.:
855#   {1;}    => nextstate;                       unstack
856#   {$x=1;} => nextstate; const; gvsv; sassign; unstack
857# Note also that each statement is prefixed with a label; this avoids
858# adjacent nextstate ops being optimised away.
859#
860# A final 1; statement is added so that the code is always in void
861# context.
862#
863# It the compile flag is set for a test, the body of the loop is wrapped in
864# eval 'sub { .... }' to measure compile time rather than execution time
865
866sub make_perl_prog {
867    my ($name, $test, $body) = @_;
868    my ($desc, $setup, $code, $pre, $post, $compile) =
869                                @$test{qw(desc setup code pre post compile)};
870
871    $setup //= '';
872    $pre  = defined $pre  ? "_PRE_: $pre; " : "";
873    $post = defined $post ? "_POST_: $post; " : "";
874    $code = $body ? $code : "1";
875    $code = "_CODE_: $code; ";
876    my $full = "$pre$code$post _CXT_: 1; ";
877    $full = "eval q{sub { $full }};" if $compile;
878
879    return <<EOF;
880# $desc
881package $name;
882BEGIN { srand(0) }
883$setup;
884for my \$__loop__ (1..\$ARGV[0]) {
885    $full
886}
887EOF
888}
889
890
891# Parse the output from cachegrind. Return a hash ref.
892# See do_selftest() for examples of the output format.
893
894sub parse_cachegrind {
895    my ($output, $id, $perl) = @_;
896
897    my %res;
898
899    my @lines = split /\n/, $output;
900    for (@lines) {
901        unless (s/(==\d+==)|(--\d+--) //) {
902            die "Error: while executing $id:\n"
903              . "unexpected code or cachegrind output:\n$_\n";
904        }
905        if (/I\s+refs:\s+([\d,]+)/) {
906            $res{Ir} = $1;
907        }
908        elsif (/I1\s+misses:\s+([\d,]+)/) {
909            $res{Ir_m1} = $1;
910        }
911        elsif (/LLi\s+misses:\s+([\d,]+)/) {
912            $res{Ir_mm} = $1;
913        }
914        elsif (/D\s+refs:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
915            @res{qw(Dr Dw)} = ($1,$2);
916        }
917        elsif (/D1\s+misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
918            @res{qw(Dr_m1 Dw_m1)} = ($1,$2);
919        }
920        elsif (/LLd\s+misses:\s+.*?([\d,]+) rd .*?([\d,]+) wr/) {
921            @res{qw(Dr_mm Dw_mm)} = ($1,$2);
922        }
923        elsif (/Branches:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
924            @res{qw(COND IND)} = ($1,$2);
925        }
926        elsif (/Mispredicts:\s+.*?([\d,]+) cond .*?([\d,]+) ind/) {
927            @res{qw(COND_m IND_m)} = ($1,$2);
928        }
929    }
930
931    for my $field (keys %VALID_FIELDS) {
932        die "Error: can't parse '$field' field from cachegrind output:\n$output"
933            unless exists $res{$field};
934        $res{$field} =~ s/,//g;
935    }
936
937    return \%res;
938}
939
940
941# Handle the 'grind' action
942
943sub do_grind {
944    my ($cmd_line_args) = @_; # the residue of @ARGV after option processing
945
946    my ($loop_counts, $perls, $results, $tests, $order, @run_perls);
947    my ($bisect_field, $bisect_min, $bisect_max);
948    my ($done_read, $processed, $averages, %seen_labels);
949
950    if (defined $OPTS{bisect}) {
951        ($bisect_field, $bisect_min, $bisect_max) = split /,/, $OPTS{bisect}, 3;
952        die "Error: --bisect option must be of form 'field,integer,integer'\n"
953            unless
954                    defined $bisect_max
955                and $bisect_min =~ /^[0-9]+$/
956                and $bisect_max =~ /^[0-9]+$/;
957
958        die "Error: unrecognised field '$bisect_field' in --bisect option\n"
959            unless $VALID_FIELDS{$bisect_field};
960
961        die "Error: --bisect min ($bisect_min) must be <= max ($bisect_max)\n"
962            if $bisect_min > $bisect_max;
963    }
964
965    # Read in previous benchmark results
966
967    foreach my $file (@{$OPTS{read}}) {
968        open my $in, '<:encoding(UTF-8)', $file
969            or die "Error: can't open '$file' for reading: $!\n";
970        my $data = do { local $/; <$in> };
971        close $in;
972
973        my $hash = JSON::PP::decode_json($data);
974        if (int($FORMAT_VERSION) < int($hash->{version})) {
975            die "Error: unsupported version $hash->{version} in file"
976              . " '$file' (too new)\n";
977        }
978        my ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order) =
979            @$hash{qw(loop_counts perls results tests order)};
980
981        # check file contents for consistency
982        my $k_o = join ';', sort @$read_order;
983        my $k_r = join ';', sort keys %$read_results;
984        my $k_t = join ';', sort keys %$read_tests;
985        die "File '$file' contains no results\n" unless length $k_r;
986        die "File '$file' contains differing test and results names\n"
987            unless $k_r eq $k_t;
988        die "File '$file' contains differing test and sort order names\n"
989            unless $k_o eq $k_t;
990
991        # delete tests not matching --tests= criteria, if any
992        filter_tests($read_results);
993        filter_tests($read_tests);
994
995        for my $perl (@$read_perls) {
996            my $label = $perl->[1];
997            die "Error: duplicate label '$label': seen in file '$file'\n"
998                if exists $seen_labels{$label};
999            $seen_labels{$label}++;
1000        }
1001
1002        if (!$done_read) {
1003            ($loop_counts, $perls, $results, $tests, $order) =
1004                ($read_loop_counts, $read_perls, $read_results, $read_tests, $read_order);
1005            $done_read = 1;
1006        }
1007        else {
1008            # merge results across multiple files
1009
1010            if (   join(';', sort keys %$tests)
1011                ne join(';', sort keys %$read_tests))
1012            {
1013                my $err = "Can't merge multiple read files: "
1014                        . "they contain differing test sets.\n";
1015                if ($OPTS{verbose}) {
1016                    $err .= "Previous tests:\n";
1017                    $err .= "  $_\n" for sort keys %$tests;
1018                    $err .= "tests from '$file':\n";
1019                    $err .= "  $_\n" for sort keys %$read_tests;
1020                }
1021                else {
1022                    $err .= "Re-run with --verbose to see the differences.\n";
1023                }
1024                die $err;
1025            }
1026
1027            if ("@$read_loop_counts" ne "@$loop_counts") {
1028                die "Can't merge multiple read files: differing loop counts:\n"
1029                . "  (previous=(@$loop_counts), "
1030                . "'$file'=(@$read_loop_counts))\n";
1031            }
1032
1033            push @$perls, @{$read_perls};
1034            foreach my $test (keys %{$read_results}) {
1035                foreach my $label (keys %{$read_results->{$test}}) {
1036                    $results->{$test}{$label}= $read_results->{$test}{$label};
1037                }
1038            }
1039        }
1040    }
1041    die "Error: --benchfile cannot be used when --read is present\n"
1042        if $done_read && defined $OPTS{benchfile};
1043
1044    # Gather list of perls to benchmark:
1045
1046    if (@$cmd_line_args) {
1047        unless ($done_read) {
1048            # How many times to execute the loop for the two trials. The lower
1049            # value is intended to do the loop enough times that branch
1050            # prediction has taken hold; the higher loop allows us to see the
1051            # branch misses after that
1052            $loop_counts = [10, 20];
1053
1054            ($tests, $order) =
1055                read_tests_file($OPTS{benchfile} // 't/perf/benchmarks');
1056        }
1057
1058        @run_perls = process_executables_list($perls, @$cmd_line_args);
1059        push @$perls, @run_perls;
1060    }
1061
1062    # strip @$order to just the actual tests present
1063    $order = [ grep exists $tests->{$_}, @$order ];
1064
1065    # Now we know what perls and tests we have, do extra option processing
1066    # and checking (done before grinding, so time isn't wasted if we die).
1067
1068    if (!$perls or !@$perls) {
1069        die "Error: nothing to do: no perls to run, no data to read.\n";
1070    }
1071    if (@$perls < 2 and $OPTS{show} and !$OPTS{raw}) {
1072        die "Error: need at least 2 perls for comparison.\n"
1073    }
1074
1075    if ($OPTS{bisect}) {
1076        die "Error: exactly one perl executable must be specified for bisect\n"
1077            unless @$perls == 1;
1078        die "Error: only a single test may be specified with --bisect\n"
1079            unless keys %$tests == 1;
1080    }
1081
1082    $OPTS{norm} = select_a_perl($OPTS{norm}, $perls, "--norm");
1083
1084    if (defined $OPTS{'sort-perl'}) {
1085        $OPTS{'sort-perl'} =
1086                select_a_perl($OPTS{'sort-perl'}, $perls, "--sort");
1087    }
1088
1089    if (defined $OPTS{'compact'}) {
1090        $OPTS{'compact'} =
1091                select_a_perl($OPTS{'compact'}, $perls, "--compact");
1092    }
1093
1094
1095    # Run the benchmarks; accumulate with any previously read # results.
1096
1097    if (@run_perls) {
1098        $results = grind_run($tests, $order, \@run_perls, $loop_counts, $results);
1099    }
1100
1101
1102    # Handle the 3 forms of output
1103
1104    if (defined $OPTS{write}) {
1105        my $json = JSON::PP::encode_json({
1106                    version      => $FORMAT_VERSION,
1107                    loop_counts  => $loop_counts,
1108                    perls        => $perls,
1109                    results      => $results,
1110                    tests        => $tests,
1111                    order        => $order,
1112                });
1113
1114        open my $out, '>:encoding(UTF-8)', $OPTS{write}
1115            or die "Error: can't open '$OPTS{write}' for writing: $!\n";
1116        print $out $json or die "Error: writing to file '$OPTS{write}': $!\n";
1117        close $out       or die "Error: closing file '$OPTS{write}': $!\n";
1118    }
1119
1120    if ($OPTS{show} or $OPTS{bisect}) {
1121        # numerically process the raw data
1122        ($processed, $averages) =
1123                    grind_process($results, $perls, $loop_counts);
1124    }
1125
1126    if ($OPTS{show}) {
1127        if (defined $OPTS{compact}) {
1128            grind_print_compact($processed, $averages, $OPTS{compact},
1129                                $perls, $tests, $order);
1130        }
1131        else {
1132            grind_print($processed, $averages, $perls, $tests, $order);
1133        }
1134    }
1135
1136    if ($OPTS{bisect}) {
1137        # these panics shouldn't happen if the bisect checks above are sound
1138        my @r = values %$results;
1139        die "Panic: expected exactly one test result in bisect\n"
1140                                                        if @r != 1;
1141        @r = values %{$r[0]};
1142        die "Panic: expected exactly one perl result in bisect\n"
1143                                                        if @r != 1;
1144        my $c = $r[0]{$bisect_field};
1145        die "Panic: no result in bisect for field '$bisect_field'\n"
1146                                                        unless defined $c;
1147
1148        print "Bisect: $bisect_field had the value $c\n";
1149
1150        exit 0 if $bisect_min <= $c and $c <= $bisect_max;
1151        exit 1;
1152    }
1153}
1154
1155
1156# Run cachegrind for every test/perl combo.
1157# It may run several processes in parallel when -j is specified.
1158# Return a hash ref suitable for input to grind_process()
1159
1160sub grind_run {
1161    my ($tests, $order, $perls, $counts, $results) = @_;
1162
1163    # Build a list of all the jobs to run
1164
1165    my @jobs;
1166
1167    for my $test (grep $tests->{$_}, @$order) {
1168
1169        # Create two test progs: one with an empty loop and one with code.
1170        my @prog = (
1171            make_perl_prog($test, $tests->{$test}, 0),
1172            make_perl_prog($test, $tests->{$test}, 1),
1173        );
1174
1175        for my $p (@$perls) {
1176            my ($perl, $label, $env, $args) = @$p;
1177
1178            # Run both the empty loop and the active loop
1179            # $counts->[0] and $counts->[1] times.
1180
1181            for my $i (0,1) {
1182                for my $j (0,1) {
1183                    my $envstr = '';
1184                    if (ref $env) {
1185                        $envstr .= "$_=$env->{$_} " for sort keys %$env;
1186                    }
1187                    my $cmd = "PERL_HASH_SEED=0 $envstr"
1188                            . "valgrind --tool=cachegrind  --branch-sim=yes --cache-sim=yes "
1189                            . "--cachegrind-out-file=/dev/null "
1190                            . "$OPTS{grindargs} "
1191                            . "$perl $OPTS{perlargs} $args - $counts->[$j] 2>&1";
1192                    # for debugging and error messages
1193                    my $id = "$test/$label "
1194                        . ($i ? "active" : "empty") . "/"
1195                        . ($j ? "long"   : "short") . " loop";
1196
1197                    push @jobs, {
1198                        test   => $test,
1199                        perl   => $perl,
1200                        plabel => $label,
1201                        cmd    => $cmd,
1202                        prog   => $prog[$i],
1203                        active => $i,
1204                        loopix => $j,
1205                        id     => $id,
1206                    };
1207                }
1208            }
1209        }
1210    }
1211
1212    # Execute each cachegrind and store the results in %results.
1213
1214    local $SIG{PIPE} = 'IGNORE';
1215
1216    my $max_jobs = $OPTS{jobs};
1217    my $running  = 0; # count of executing jobs
1218    my %pids;         # map pids to jobs
1219    my %fds;          # map fds  to jobs
1220    my $select = IO::Select->new();
1221
1222    my $njobs     = scalar @jobs;
1223    my $donejobs  = 0;
1224    my $starttime = time();
1225
1226    while (@jobs or $running) {
1227
1228        if ($OPTS{debug}) {
1229            printf "Main loop: pending=%d running=%d\n",
1230                scalar(@jobs), $running;
1231        }
1232
1233        # Start new jobs
1234
1235        while (@jobs && $running < $max_jobs) {
1236            my $job = shift @jobs;
1237            my ($id, $cmd) =@$job{qw(id cmd)};
1238
1239            my ($in, $out, $pid);
1240            $donejobs++;
1241            if($OPTS{verbose}) {
1242                my $donefrac = $donejobs / $njobs;
1243                my $eta = "";
1244                # Once we've done at least 20% we'll have a good estimate of
1245                # the total runtime, hence ETA
1246                if($donefrac >= 0.2) {
1247                    my $now = time();
1248                    my $duration  = ($now - $starttime) / $donefrac;
1249                    my $remaining = ($starttime + $duration) - $now;
1250                    $eta = sprintf ", remaining %d:%02d",
1251                        $remaining / 60, $remaining % 60;
1252                }
1253                warn sprintf "Starting %s (%d of %d, %.2f%%%s)\n",
1254                    $id, $donejobs, $njobs, 100 * $donefrac, $eta;
1255            }
1256            eval { $pid = IPC::Open2::open2($out, $in, $cmd); 1; }
1257                or die "Error: while starting cachegrind subprocess"
1258                   ." for $id:\n$@";
1259            $running++;
1260            $pids{$pid}    = $job;
1261            $fds{"$out"}   = $job;
1262            $job->{out_fd} = $out;
1263            $job->{output} = '';
1264            $job->{pid}    = $pid;
1265
1266            $out->blocking(0);
1267            $select->add($out);
1268
1269            if ($OPTS{debug}) {
1270                print "Started pid $pid for $id\n";
1271            }
1272
1273            # Note:
1274            # In principle we should write to $in in the main select loop,
1275            # since it may block. In reality,
1276            #  a) the code we write to the perl process's stdin is likely
1277            #     to be less than the OS's pipe buffer size;
1278            #  b) by the time the perl process has read in all its stdin,
1279            #     the only output it should have generated is a few lines
1280            #     of cachegrind output preamble.
1281            # If these assumptions change, then perform the following print
1282            # in the select loop instead.
1283
1284            print $in $job->{prog};
1285            close $in;
1286        }
1287
1288        # Get output of running jobs
1289
1290        if ($OPTS{debug}) {
1291            printf "Select: waiting on (%s)\n",
1292                join ', ', sort { $a <=> $b } map $fds{$_}{pid},
1293                            $select->handles;
1294        }
1295
1296        my @ready = $select->can_read;
1297
1298        if ($OPTS{debug}) {
1299            printf "Select: pids (%s) ready\n",
1300                join ', ', sort { $a <=> $b } map $fds{$_}{pid}, @ready;
1301        }
1302
1303        unless (@ready) {
1304            die "Panic: select returned no file handles\n";
1305        }
1306
1307        for my $fd (@ready) {
1308            my $j = $fds{"$fd"};
1309            my $r = sysread $fd, $j->{output}, 8192, length($j->{output});
1310            unless (defined $r) {
1311                die "Panic: Read from process running $j->{id} gave:\n$!";
1312            }
1313            next if $r;
1314
1315            # EOF
1316
1317            if ($OPTS{debug}) {
1318                print "Got eof for pid $fds{$fd}{pid} ($j->{id})\n";
1319            }
1320
1321            $select->remove($j->{out_fd});
1322            close($j->{out_fd})
1323                or die "Panic: closing output fh on $j->{id} gave:\n$!\n";
1324            $running--;
1325            delete $fds{"$j->{out_fd}"};
1326            my $output = $j->{output};
1327
1328            if ($OPTS{debug}) {
1329                my $p = $j->{prog};
1330                $p =~ s/^/    : /mg;
1331                my $o = $output;
1332                $o =~ s/^/    : /mg;
1333
1334                print "\n$j->{id}/\nCommand: $j->{cmd}\n"
1335                    . "Input:\n$p"
1336                    . "Output\n$o";
1337            }
1338
1339            $results->{$j->{test}}{$j->{plabel}}[$j->{active}][$j->{loopix}]
1340                    = parse_cachegrind($output, $j->{id}, $j->{perl});
1341        }
1342
1343        # Reap finished jobs
1344
1345        while (1) {
1346            my $kid = waitpid(-1, WNOHANG);
1347            my $ret = $?;
1348            last if $kid <= 0;
1349
1350            unless (exists $pids{$kid}) {
1351                die "Panic: reaped unexpected child $kid";
1352            }
1353            my $j = $pids{$kid};
1354            if ($ret) {
1355                die sprintf("Error: $j->{id} gave return status 0x%04x\n", $ret)
1356                    . "with the following output\n:$j->{output}\n";
1357            }
1358            delete $pids{$kid};
1359        }
1360    }
1361
1362    return $results;
1363}
1364
1365
1366
1367
1368# grind_process(): process the data that has been extracted from
1369# cachgegrind's output.
1370#
1371# $res is of the form ->{benchmark_name}{perl_label}[active][count]{field_name},
1372# where active is 0 or 1 indicating an empty or active loop,
1373# count is 0 or 1 indicating a short or long loop. E.g.
1374#
1375#    $res->{'expr::assign::scalar_lex'}{perl-5.21.1}[0][10]{Dw_mm}
1376#
1377# The $res data structure is modified in-place by this sub.
1378#
1379# $perls is [ [ perl-exe, perl-label], .... ].
1380#
1381# $counts is [ N, M ] indicating the counts for the short and long loops.
1382#
1383#
1384# return \%output, \%averages, where
1385#
1386# $output{benchmark_name}{perl_label}{field_name} = N
1387# $averages{perl_label}{field_name} = M
1388#
1389# where N is the raw count ($OPTS{raw}), or count_perl0/count_perlI otherwise;
1390# M is the average raw count over all tests ($OPTS{raw}), or
1391# 1/(sum(count_perlI/count_perl0)/num_tests) otherwise.
1392
1393sub grind_process {
1394    my ($res, $perls, $counts) = @_;
1395
1396    # Process the four results for each test/perf combo:
1397    # Convert
1398    #    $res->{benchmark_name}{perl_label}[active][count]{field_name} = n
1399    # to
1400    #    $res->{benchmark_name}{perl_label}{field_name} = averaged_n
1401    #
1402    # $r[0][1] - $r[0][0] is the time to do ($counts->[1]-$counts->[0])
1403    #                     empty loops, eliminating startup time
1404    # $r[1][1] - $r[1][0] is the time to do ($counts->[1]-$counts->[0])
1405    #                     active loops, eliminating startup time
1406    # (the two startup times may be different because different code
1407    # is being compiled); the difference of the two results above
1408    # divided by the count difference is the time to execute the
1409    # active code once, eliminating both startup and loop overhead.
1410
1411    for my $tests (values %$res) {
1412        for my $r (values %$tests) {
1413            my $r2;
1414            for (keys %{$r->[0][0]}) {
1415                my $n = (  ($r->[1][1]{$_} - $r->[1][0]{$_})
1416                         - ($r->[0][1]{$_} - $r->[0][0]{$_})
1417                        ) / ($counts->[1] - $counts->[0]);
1418                $r2->{$_} = $n;
1419            }
1420            $r = $r2;
1421        }
1422    }
1423
1424    my %totals;
1425    my %counts;
1426    my %data;
1427
1428    my $perl_norm = $perls->[$OPTS{norm}][1]; # the label of the reference perl
1429
1430    for my $test_name (keys %$res) {
1431        my $res1 = $res->{$test_name};
1432        my $res2_norm = $res1->{$perl_norm};
1433        for my $perl (keys %$res1) {
1434            my $res2 = $res1->{$perl};
1435            for my $field (keys %$res2) {
1436                my ($p, $q) = ($res2_norm->{$field}, $res2->{$field});
1437
1438                if ($OPTS{raw}) {
1439                    # Avoid annoying '-0.0' displays. Ideally this number
1440                    # should never be negative, but fluctuations in
1441                    # startup etc can theoretically make this happen
1442                    $q = 0 if ($q <= 0 && $q > -0.1);
1443                    $totals{$perl}{$field} += $q;
1444                    $counts{$perl}{$field}++;
1445                    $data{$test_name}{$perl}{$field} = $q;
1446                    next;
1447                }
1448
1449                # $p and $q are notionally integer counts, but
1450                # due to variations in startup etc, it's possible for a
1451                # count which is supposedly zero to be calculated as a
1452                # small positive or negative value.
1453                # In this case, set it to zero. Further below we
1454                # special-case zeros to avoid division by zero errors etc.
1455
1456                $p = 0.0 if $p < 0.01;
1457                $q = 0.0 if $q < 0.01;
1458
1459                if ($p == 0.0 && $q == 0.0) {
1460                    # Both perls gave a count of zero, so no change:
1461                    # treat as 100%
1462                    $totals{$perl}{$field} += 1;
1463                    $counts{$perl}{$field}++;
1464                    $data{$test_name}{$perl}{$field} = 1;
1465                }
1466                elsif ($p == 0.0 || $q == 0.0) {
1467                    # If either count is zero, there were too few events
1468                    # to give a meaningful ratio (and we will end up with
1469                    # division by zero if we try). Mark the result undef,
1470                    # indicating that it shouldn't be displayed; and skip
1471                    # adding to the average
1472                    $data{$test_name}{$perl}{$field} = undef;
1473                }
1474                else {
1475                    # For averages, we record q/p rather than p/q.
1476                    # Consider a test where perl_norm took 1000 cycles
1477                    # and perlN took 800 cycles. For the individual
1478                    # results we display p/q, or 1.25; i.e. a quarter
1479                    # quicker. For the averages, we instead sum all
1480                    # the 0.8's, which gives the total cycles required to
1481                    # execute all tests, with all tests given equal
1482                    # weight. Later we reciprocate the final result,
1483                    # i.e. 1/(sum(qi/pi)/n)
1484
1485                    $totals{$perl}{$field} += $q/$p;
1486                    $counts{$perl}{$field}++;
1487                    $data{$test_name}{$perl}{$field} = $p/$q;
1488                }
1489            }
1490        }
1491    }
1492
1493    # Calculate averages based on %totals and %counts accumulated earlier.
1494
1495    my %averages;
1496    for my $perl (keys %totals) {
1497        my $t = $totals{$perl};
1498        for my $field (keys %$t) {
1499            $averages{$perl}{$field} = $OPTS{raw}
1500                ? $t->{$field} / $counts{$perl}{$field}
1501                  # reciprocal - see comments above
1502                : $counts{$perl}{$field} / $t->{$field};
1503        }
1504    }
1505
1506    return \%data, \%averages;
1507}
1508
1509
1510
1511# print a standard blurb at the start of the grind display
1512
1513sub grind_blurb {
1514    my ($perls) = @_;
1515
1516    print <<EOF;
1517Key:
1518    Ir   Instruction read
1519    Dr   Data read
1520    Dw   Data write
1521    COND conditional branches
1522    IND  indirect branches
1523    _m   branch predict miss
1524    _m1  level 1 cache miss
1525    _mm  last cache (e.g. L3) miss
1526    -    indeterminate percentage (e.g. 1/0)
1527
1528EOF
1529
1530    if ($OPTS{raw}) {
1531        print "The numbers represent raw counts per loop iteration.\n";
1532    }
1533    else {
1534        print <<EOF;
1535The numbers represent relative counts per loop iteration, compared to
1536$perls->[$OPTS{norm}][1] at 100.0%.
1537Higher is better: for example, using half as many instructions gives 200%,
1538while using twice as many gives 50%.
1539EOF
1540    }
1541}
1542
1543
1544# return a sorted list of the test names, plus 'AVERAGE'
1545
1546sub sorted_test_names {
1547    my ($results, $order, $perls) = @_;
1548
1549    my @names;
1550    unless ($OPTS{average}) {
1551        if (defined $OPTS{'sort-field'}) {
1552            my ($field, $perlix) = @OPTS{'sort-field', 'sort-perl'};
1553            my $perl = $perls->[$perlix][1];
1554            @names = sort
1555                {
1556                        $results->{$a}{$perl}{$field}
1557                    <=> $results->{$b}{$perl}{$field}
1558                }
1559                keys %$results;
1560        }
1561        else {
1562            @names = grep $results->{$_}, @$order;
1563        }
1564    }
1565
1566    # No point in displaying average for only one test.
1567    push @names,  'AVERAGE' unless @names == 1;
1568    @names;
1569}
1570
1571
1572# format one cell data item
1573
1574sub grind_format_cell {
1575    my ($val, $width) = @_;
1576    my $s;
1577    if (!defined $val) {
1578        return sprintf "%*s", $width, '-';
1579    }
1580    elsif (abs($val) >= 1_000_000) {
1581        # avoid displaying very large numbers (which might be the
1582        # result of e.g. 1 / 0.000001)
1583        return sprintf "%*s", $width, 'Inf';
1584    }
1585    elsif ($OPTS{raw}) {
1586        return sprintf "%*.1f", $width, $val;
1587    }
1588    else {
1589        return sprintf "%*.2f", $width, $val * 100;
1590    }
1591}
1592
1593# grind_print(): display the tabulated results of all the cachegrinds.
1594#
1595# Arguments are of the form:
1596#    $results->{benchmark_name}{perl_label}{field_name} = N
1597#    $averages->{perl_label}{field_name} = M
1598#    $perls = [ [ perl-exe, perl-label ], ... ]
1599#    $tests->{test_name}{desc => ..., ...}
1600#    $order = [ 'foo::bar1', ... ]  # order to display tests
1601
1602sub grind_print {
1603    my ($results, $averages, $perls, $tests, $order) = @_;
1604
1605    my @perl_names = map $_->[0], @$perls;
1606    my @perl_labels = map $_->[1], @$perls;
1607    my %perl_labels;
1608    $perl_labels{$_->[0]} = $_->[1] for @$perls;
1609
1610    # Print standard header.
1611    grind_blurb($perls);
1612
1613    my @test_names = sorted_test_names($results, $order, $perls);
1614
1615    my @fields = qw(Ir Dr Dw COND IND
1616                     COND_m IND_m
1617                     Ir_m1 Dr_m1 Dw_m1
1618                     Ir_mm Dr_mm Dw_mm
1619                  );
1620
1621    if ($OPTS{fields}) {
1622        @fields = grep exists $OPTS{fields}{$_}, @fields;
1623    }
1624
1625    # If only a single field is to be displayed, use a more compact
1626    # format with only a single line of output per test.
1627
1628    my $one_field = @fields == 1;
1629
1630    # The width of column 0: this is either field names, or for
1631    # $one_field, test names
1632
1633    my $width0 = 0;
1634    for ($one_field ? @test_names : @fields) {
1635        $width0 = length if length > $width0;
1636    }
1637
1638    # Calculate the widths of the data columns
1639
1640    my @widths = map length, @perl_labels;
1641
1642    for my $test (@test_names) {
1643        my $res = ($test eq 'AVERAGE') ? $averages : $results->{$test};
1644        for my $field (@fields) {
1645            for my $i (0..$#widths) {
1646                my $l = length grind_format_cell(
1647                                    $res->{$perl_labels[$i]}{$field}, 1);
1648                $widths[$i] = $l if $l > $widths[$i];
1649            }
1650        }
1651    }
1652
1653    # Print the results for each test
1654
1655    for my $test (0..$#test_names) {
1656        my $test_name = $test_names[$test];
1657        my $doing_ave = ($test_name eq 'AVERAGE');
1658        my $res = $doing_ave ? $averages : $results->{$test_name};
1659
1660        # print per-test header
1661
1662        if ($one_field) {
1663            print "\nResults for field $fields[0]\n\n" if $test == 0;
1664        }
1665        else {
1666            print "\n$test_name";
1667            print "\n$tests->{$test_name}{desc}" unless $doing_ave;
1668            print "\n\n";
1669        }
1670
1671        # Print the perl executable names header.
1672
1673        if (!$one_field || $test == 0) {
1674            for my $i (0,1) {
1675                print " " x $width0;
1676                for (0..$#widths) {
1677                    printf " %*s", $widths[$_],
1678                        $i ? ('-' x$widths[$_]) : $perl_labels[$_];
1679                }
1680                print "\n";
1681            }
1682        }
1683
1684        my $field_suffix = '';
1685
1686        # print a line of data
1687
1688        for my $field (@fields) {
1689            if ($one_field) {
1690                printf "%-*s", $width0, $test_name;
1691            }
1692            else {
1693                # If there are enough fields, print a blank line
1694                # between groups of fields that have the same suffix
1695                if (@fields > 4) {
1696                    my $s = '';
1697                    $s = $1 if $field =~ /(_\w+)$/;
1698                    print "\n" if $s ne $field_suffix;
1699                    $field_suffix = $s;
1700                }
1701                printf "%*s", $width0, $field;
1702            }
1703
1704            for my $i (0..$#widths) {
1705                print " ", grind_format_cell($res->{$perl_labels[$i]}{$field},
1706                                            $widths[$i]);
1707            }
1708            print "\n";
1709        }
1710    }
1711}
1712
1713
1714
1715# grind_print_compact(): like grind_print(), but display a single perl
1716# in a compact form. Has an additional arg, $which_perl, which specifies
1717# which perl to display.
1718#
1719# Arguments are of the form:
1720#    $results->{benchmark_name}{perl_label}{field_name} = N
1721#    $averages->{perl_label}{field_name} = M
1722#    $perls = [ [ perl-exe, perl-label ], ... ]
1723#    $tests->{test_name}{desc => ..., ...}
1724#    $order = [ 'foo::bar1', ... ]  # order to display tests
1725
1726sub grind_print_compact {
1727    my ($results, $averages, $which_perl, $perls, $tests, $order) = @_;
1728
1729    # Print standard header.
1730    grind_blurb($perls);
1731
1732    print "\nResults for $perls->[$which_perl][1]\n\n";
1733
1734    my @test_names = sorted_test_names($results, $order, $perls);
1735
1736    # Dump the results for each test.
1737
1738     my @fields = qw( Ir Dr Dw
1739                      COND IND
1740                      COND_m IND_m
1741                      Ir_m1 Dr_m1 Dw_m1
1742                      Ir_mm Dr_mm Dw_mm
1743                    );
1744    if ($OPTS{fields}) {
1745        @fields = grep exists $OPTS{fields}{$_}, @fields;
1746    }
1747
1748    # calculate the max width of the test names
1749
1750    my $name_width = 0;
1751    for (@test_names) {
1752        $name_width = length if length > $name_width;
1753    }
1754
1755    # Calculate the widths of the data columns
1756
1757    my @widths = map length, @fields;
1758
1759    for my $test (@test_names) {
1760        my $res = ($test eq 'AVERAGE') ? $averages : $results->{$test};
1761        $res = $res->{$perls->[$which_perl][1]};
1762        for my $i (0..$#fields) {
1763            my $l = length grind_format_cell($res->{$fields[$i]}, 1);
1764            $widths[$i] = $l if $l > $widths[$i];
1765        }
1766    }
1767
1768    # Print header
1769
1770    printf " %*s", $widths[$_], $fields[$_] for 0..$#fields;
1771    print "\n";
1772    printf " %*s", $_, ('-' x $_) for @widths;
1773    print "\n";
1774
1775    # Print the results for each test
1776
1777    for my $test_name (@test_names) {
1778        my $doing_ave = ($test_name eq 'AVERAGE');
1779        my $res = $doing_ave ? $averages : $results->{$test_name};
1780        $res = $res->{$perls->[$which_perl][1]};
1781        my $desc = $doing_ave
1782            ? $test_name
1783            : sprintf "%-*s   %s", $name_width, $test_name,
1784                                 $tests->{$test_name}{desc};
1785
1786        for my $i (0..$#fields) {
1787            print " ", grind_format_cell($res->{$fields[$i]}, $widths[$i]);
1788        }
1789        print "  $desc\n";
1790    }
1791}
1792
1793
1794# do_selftest(): check that we can parse known cachegrind()
1795# output formats. If the output of cachegrind changes, add a *new*
1796# test here; keep the old tests to make sure we continue to parse
1797# old cachegrinds
1798
1799sub do_selftest {
1800
1801    my @tests = (
1802        'standard',
1803        <<'EOF',
1804==32350== Cachegrind, a cache and branch-prediction profiler
1805==32350== Copyright (C) 2002-2013, and GNU GPL'd, by Nicholas Nethercote et al.
1806==32350== Using Valgrind-3.9.0 and LibVEX; rerun with -h for copyright info
1807==32350== Command: perl5211o /tmp/uiS2gjdqe5 1
1808==32350==
1809--32350-- warning: L3 cache found, using its data for the LL simulation.
1810==32350==
1811==32350== I   refs:      1,124,055
1812==32350== I1  misses:        5,573
1813==32350== LLi misses:        3,338
1814==32350== I1  miss rate:      0.49%
1815==32350== LLi miss rate:      0.29%
1816==32350==
1817==32350== D   refs:        404,275  (259,191 rd   + 145,084 wr)
1818==32350== D1  misses:        9,608  (  6,098 rd   +   3,510 wr)
1819==32350== LLd misses:        5,794  (  2,781 rd   +   3,013 wr)
1820==32350== D1  miss rate:       2.3% (    2.3%     +     2.4%  )
1821==32350== LLd miss rate:       1.4% (    1.0%     +     2.0%  )
1822==32350==
1823==32350== LL refs:          15,181  ( 11,671 rd   +   3,510 wr)
1824==32350== LL misses:         9,132  (  6,119 rd   +   3,013 wr)
1825==32350== LL miss rate:        0.5% (    0.4%     +     2.0%  )
1826==32350==
1827==32350== Branches:        202,372  (197,050 cond +   5,322 ind)
1828==32350== Mispredicts:      19,153  ( 17,742 cond +   1,411 ind)
1829==32350== Mispred rate:        9.4% (    9.0%     +    26.5%   )
1830EOF
1831        {
1832            COND    =>  197050,
1833            COND_m  =>   17742,
1834            Dr      =>  259191,
1835            Dr_m1   =>    6098,
1836            Dr_mm   =>    2781,
1837            Dw      =>  145084,
1838            Dw_m1   =>    3510,
1839            Dw_mm   =>    3013,
1840            IND     =>    5322,
1841            IND_m   =>    1411,
1842            Ir      => 1124055,
1843            Ir_m1   =>    5573,
1844            Ir_mm   =>    3338,
1845        },
1846    );
1847
1848    for ('./t', '.') {
1849        my $t = "$_/test.pl";
1850        next unless  -f $t;
1851        require $t;
1852    }
1853    plan(@tests / 3 * keys %VALID_FIELDS);
1854
1855    while (@tests) {
1856        my $desc     = shift @tests;
1857        my $output   = shift @tests;
1858        my $expected = shift @tests;
1859        my $p = parse_cachegrind($output);
1860        for (sort keys %VALID_FIELDS) {
1861            is($p->{$_}, $expected->{$_}, "$desc, $_");
1862        }
1863    }
1864}
1865