xref: /openbsd-src/gnu/usr.bin/perl/Porting/valgrindpp.pl (revision 5486feefcc8cb79b19e014ab332cc5dfd05b3b33)
1#!/usr/bin/perl
2
3use IO::File ();
4use File::Find qw(find);
5use Text::Wrap qw(wrap);
6use Getopt::Long qw(GetOptions);
7use Pod::Usage qw(pod2usage);
8use Cwd qw(cwd);
9use File::Spec;
10use strict;
11
12my %opt = (
13  frames  => 3,
14  lines   => 0,
15  tests   => 0,
16  top     => 0,
17  verbose => 0,
18);
19
20GetOptions(\%opt, qw(
21            dir=s
22            frames=i
23            hide=s@
24            lines!
25            output-file=s
26            tests!
27            top=i
28            verbose+
29          )) or pod2usage(2);
30
31# Setup the directory to process
32if (exists $opt{dir}) {
33  $opt{dir} = File::Spec->canonpath($opt{dir});
34}
35else {
36  # Check if we're in 't'
37  $opt{dir} = cwd =~ /\/t$/ ? '..' : '.';
38
39  # Check if we're in the right directory
40  -d "$opt{dir}/$_" or die "$0: must be run from the perl source directory"
41                         . " when --dir is not given\n"
42      for qw(t lib ext);
43}
44
45# Assemble regex for functions whose leaks should be hidden
46# (no, a hash won't be significantly faster)
47my $hidden = do { local $"='|'; $opt{hide} ? qr/^(?:@{$opt{hide}})$/o : '' };
48
49# Setup our output file handle
50# (do it early, as it may fail)
51my $fh = \*STDOUT;
52if (exists $opt{'output-file'}) {
53  $fh = IO::File->new($opt{'output-file'}, 'w')
54        or die "$0: cannot open $opt{'output-file'} ($!)\n";
55}
56
57# These hashes will receive the error and leak summary data:
58#
59# %error = (
60#   error_name => {
61#                   stack_frame => {
62#                                    test_script => occurrences
63#                                  }
64#                 }
65# );
66#
67# %leak = (
68#   leak_type => {
69#                  stack_frames => {
70#                                    test_script => occurrences
71#                                  }
72#                } # stack frames are separated by '<'s
73# );
74my(%error, %leak);
75
76# Collect summary data
77find({wanted => \&filter, no_chdir => 1}, $opt{dir});
78
79# Format the output nicely
80$Text::Wrap::columns = 80;
81$Text::Wrap::unexpand = 0;
82
83# Write summary
84summary($fh, \%error, \%leak);
85
86exit 0;
87
88sub summary {
89  my($fh, $error, $leak) = @_;
90  my(%ne, %nl, %top);
91
92  # Prepare the data
93
94  for my $e (keys %$error) {
95    for my $f (keys %{$error->{$e}}) {
96      my($func, $file, $line) = split /:/, $f;
97      my $nf = $opt{lines} ? "$func ($file:$line)" : "$func ($file)";
98      $ne{$e}{$nf}{count}++;
99      while (my($k,$v) = each %{$error->{$e}{$f}}) {
100        $ne{$e}{$nf}{tests}{$k} += $v;
101        $top{$k}{error}++;
102      }
103    }
104  }
105
106  for my $l (keys %$leak) {
107    for my $s (keys %{$leak->{$l}}) {
108      my $ns = join '<', map {
109                 my($func, $file, $line) = split /:/;
110                 /:/ ? $opt{lines}
111                       ? "$func ($file:$line)" : "$func ($file)"
112                     : $_
113               } split /</, $s;
114      $nl{$l}{$ns}{count}++;
115      while (my($k,$v) = each %{$leak->{$l}{$s}}) {
116        $nl{$l}{$ns}{tests}{$k} += $v;
117        $top{$k}{leak}++;
118      }
119    }
120  }
121
122  # Print the Top N
123
124  if ($opt{top}) {
125    for my $what (qw(error leak)) {
126      my @t = sort { $top{$b}{$what} <=> $top{$a}{$what} or $a cmp $b }
127              grep $top{$_}{$what}, keys %top;
128      @t > $opt{top} and splice @t, $opt{top};
129      my $n = @t;
130      my $s = $n > 1 ? 's' : '';
131      my $prev = 0;
132      print $fh "Top $n test scripts for ${what}s:\n\n";
133      for my $i (1 .. $n) {
134        $n = $top{$t[$i-1]}{$what};
135        $s = $n > 1 ? 's' : '';
136        printf $fh "    %3s %-40s %3d $what$s\n",
137                   $n != $prev ? "$i." : '', $t[$i-1], $n;
138        $prev = $n;
139      }
140      print $fh "\n";
141    }
142  }
143
144  # Print the real summary
145
146  print $fh "MEMORY ACCESS ERRORS\n\n";
147
148  for my $e (sort keys %ne) {
149    print $fh qq("$e"\n);
150    for my $frame (sort keys %{$ne{$e}}) {
151      my $data = $ne{$e}{$frame};
152      my $count = $data->{count} > 1 ? " [$data->{count} paths]" : '';
153      print $fh ' 'x4, "$frame$count\n",
154                format_tests($data->{tests}), "\n";
155    }
156    print $fh "\n";
157  }
158
159  print $fh "\nMEMORY LEAKS\n\n";
160
161  for my $l (sort keys %nl) {
162    print $fh qq("$l"\n);
163    for my $frames (sort keys %{$nl{$l}}) {
164      my $data = $nl{$l}{$frames};
165      my @stack = split /</, $frames;
166      $data->{count} > 1 and $stack[-1] .= " [$data->{count} paths]";
167      print $fh join('', map { ' 'x4 . "$_:$stack[$_]\n" } 0 .. $#stack ),
168                format_tests($data->{tests}), "\n\n";
169    }
170  }
171}
172
173sub format_tests {
174  my $tests = shift;
175  my $indent = ' 'x8;
176
177  if ($opt{tests}) {
178    return wrap($indent, $indent, join ', ', sort keys %$tests);
179  }
180  else {
181    my $count = keys %$tests;
182    my $s = $count > 1 ? 's' : '';
183    return $indent . "triggered by $count test$s";
184  }
185}
186
187sub filter {
188  debug(2, "$File::Find::name\n");
189
190  # Only process '*.t.valgrind' files
191  /(.*)\.t\.valgrind$/ or return;
192
193  # Strip all unnecessary stuff from the test name
194  my $test = $1;
195  $test =~ s/^(?:(?:\Q$opt{dir}\E|[.t])\/)+//;
196
197  debug(1, "processing $test ($_)\n");
198
199  # Get all the valgrind output lines
200  my @l = do {
201    my $fh = IO::File->new($_, 'r') or die "$0: cannot open $_ ($!)\n";
202    # Process outputs can interrupt each other, so sort by pid first
203    my %pid; local $_;
204    while (<$fh>) {
205      chomp;
206      s/^==(\d+)==\s?// and push @{$pid{$1}}, $_;
207    }
208    map @$_, values %pid;
209  };
210
211  # Setup some useful regexes
212  my $hexaddr  = '0x[[:xdigit:]]+';
213  my $topframe = qr/^\s+at $hexaddr:\s+/;
214  my $address  = qr/^\s+Address $hexaddr is \d+ bytes (?:before|inside|after) a block of size \d+/;
215  my $leak     = qr/^\s*\d+ bytes in \d+ blocks are (still reachable|(?:definite|possib)ly lost)/;
216
217  for my $i (0 .. $#l) {
218    $l[$i]   =~ $topframe or next; # Match on any topmost frame...
219    $l[$i-1] =~ $address and next; # ...but not if it's only address details
220    my $line = $l[$i-1]; # The error / leak description line
221    my $j    = $i;
222
223    if ($line =~ $leak) {
224      debug(2, "LEAK: $line\n");
225
226      my $type   = $1;     # Type of leak (still reachable, ...)
227      my $inperl = 0;      # Are we inside the perl source? (And how deep?)
228      my @stack;           # Call stack
229
230      while ($l[$j++] =~ /^\s+(?:at|by) $hexaddr:\s+(\w+)\s+\((?:([^:]+):(\d+)|[^)]+)\)/o) {
231        my($func, $file, $lineno) = ($1, $2, $3);
232
233        # If the stack frame is inside perl => increment $inperl
234        # If we've already been inside perl, but are no longer => leave
235        defined $file && ++$inperl or $inperl && last;
236
237        # A function that should be hidden? => clear stack and leave
238        $hidden && $func =~ $hidden and @stack = (), last;
239
240        # Add stack frame if it's within our threshold
241        if ($inperl <= $opt{frames}) {
242          push @stack, $inperl ? "$func:$file:$lineno" : $func;
243        }
244      }
245
246      # If there's something on the stack and we've seen perl code,
247      # add this memory leak to the summary data
248      @stack and $inperl and $leak{$type}{join '<', @stack}{$test}++;
249    } else {
250      debug(1, "ERROR: $line\n");
251
252      # Simply find the topmost frame in the call stack within
253      # the perl source code
254      while ($l[$j++] =~ /^\s+(?:at|by) $hexaddr:\s+(?:(\w+)\s+\(([^:]+):(\d+)\))?/o) {
255        if (defined $1) {
256          $error{$line}{"$1:$2:$3"}{$test}++;
257          last;
258        }
259      }
260    }
261  }
262}
263
264sub debug {
265  my $level = shift;
266  $opt{verbose} >= $level and print STDERR @_;
267}
268
269__END__
270
271=head1 NAME
272
273valgrindpp.pl - A post processor for C<make test.valgrind>
274
275=head1 SYNOPSIS
276
277valgrindpp.pl [B<--dir>=I<dir>] [B<--frames>=I<number>]
278[B<--hide>=I<identifier>] [B<--lines>]
279[B<--output-file>=I<file>] [B<--tests>]
280[B<--top>=I<number>] [B<--verbose>]
281
282=head1 DESCRIPTION
283
284B<valgrindpp.pl> is a post processor for I<.valgrind> files
285created during C<make test.valgrind>. It collects all these
286files, extracts most of the information and produces a
287significantly shorter summary of all detected memory access
288errors and memory leaks.
289
290=head1 OPTIONS
291
292=over 4
293
294=item B<--dir>=I<dir>
295
296Recursively process I<.valgrind> files in I<dir>. If this
297options is not given, B<valgrindpp.pl> must be run from
298either the perl source or the I<t> directory and will process
299all I<.valgrind> files within the distribution.
300
301=item B<--frames>=I<number>
302
303Number of stack frames within the perl source code to
304consider when distinguishing between memory leak sources.
305Increasing this value will give you a longer backtrace,
306while decreasing the number will show you fewer sources
307for memory leaks. The default is 3 frames.
308
309=item B<--hide>=I<identifier>
310
311Hide all memory leaks that have I<identifier> in their backtrace.
312Useful if you want to hide leaks from functions that are known to
313have lots of memory leaks. I<identifier> can also be a regular
314expression, in which case all leaks with symbols matching the
315expression are hidden. Can be given multiple times.
316
317=item B<--lines>
318
319Show line numbers for stack frames. This is useful for further
320increasing the error/leak resolution, but makes it harder to
321compare different reports using I<diff>.
322
323=item B<--output-file>=I<file>
324
325Redirect the output into I<file>. If this option is not
326given, the output goes to I<stdout>.
327
328=item B<--tests>
329
330List all tests that trigger memory access errors or memory
331leaks explicitly instead of only printing a count.
332
333=item B<--top>=I<number>
334
335List the top I<number> test scripts for memory access errors
336and memory leaks. Set to C<0> for no top-I<n> statistics.
337
338=item B<--verbose>
339
340Increase verbosity level. Can be given multiple times.
341
342=back
343
344=head1 COPYRIGHT
345
346Copyright 2003 by Marcus Holland-Moritz <mhx@cpan.org>.
347
348This program is free software; you may redistribute it
349and/or modify it under the same terms as Perl itself.
350
351=cut
352