xref: /netbsd-src/external/bsd/jemalloc/dist/bin/jeprof.in (revision 7bdf38e5b7a28439665f2fdeff81e36913eef7dd)
1#! /usr/bin/env perl
2
3# Copyright (c) 1998-2007, Google Inc.
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions are
8# met:
9#
10#     * Redistributions of source code must retain the above copyright
11# notice, this list of conditions and the following disclaimer.
12#     * Redistributions in binary form must reproduce the above
13# copyright notice, this list of conditions and the following disclaimer
14# in the documentation and/or other materials provided with the
15# distribution.
16#     * Neither the name of Google Inc. nor the names of its
17# contributors may be used to endorse or promote products derived from
18# this software without specific prior written permission.
19#
20# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32# ---
33# Program for printing the profile generated by common/profiler.cc,
34# or by the heap profiler (common/debugallocation.cc)
35#
36# The profile contains a sequence of entries of the form:
37#       <count> <stack trace>
38# This program parses the profile, and generates user-readable
39# output.
40#
41# Examples:
42#
43# % tools/jeprof "program" "profile"
44#   Enters "interactive" mode
45#
46# % tools/jeprof --text "program" "profile"
47#   Generates one line per procedure
48#
49# % tools/jeprof --gv "program" "profile"
50#   Generates annotated call-graph and displays via "gv"
51#
52# % tools/jeprof --gv --focus=Mutex "program" "profile"
53#   Restrict to code paths that involve an entry that matches "Mutex"
54#
55# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile"
56#   Restrict to code paths that involve an entry that matches "Mutex"
57#   and does not match "string"
58#
59# % tools/jeprof --list=IBF_CheckDocid "program" "profile"
60#   Generates disassembly listing of all routines with at least one
61#   sample that match the --list=<regexp> pattern.  The listing is
62#   annotated with the flat and cumulative sample counts at each line.
63#
64# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile"
65#   Generates disassembly listing of all routines with at least one
66#   sample that match the --disasm=<regexp> pattern.  The listing is
67#   annotated with the flat and cumulative sample counts at each PC value.
68#
69# TODO: Use color to indicate files?
70
71use strict;
72use warnings;
73use Getopt::Long;
74use Cwd;
75
76my $JEPROF_VERSION = "@jemalloc_version@";
77my $PPROF_VERSION = "2.0";
78
79# These are the object tools we use which can come from a
80# user-specified location using --tools, from the JEPROF_TOOLS
81# environment variable, or from the environment.
82my %obj_tool_map = (
83  "objdump" => "objdump",
84  "nm" => "nm",
85  "addr2line" => "addr2line",
86  "c++filt" => "c++filt",
87  ## ConfigureObjTools may add architecture-specific entries:
88  #"nm_pdb" => "nm-pdb",       # for reading windows (PDB-format) executables
89  #"addr2line_pdb" => "addr2line-pdb",                                # ditto
90  #"otool" => "otool",         # equivalent of objdump on OS X
91);
92# NOTE: these are lists, so you can put in commandline flags if you want.
93my @DOT = ("dot");          # leave non-absolute, since it may be in /usr/local
94my @GV = ("gv");
95my @EVINCE = ("evince");    # could also be xpdf or perhaps acroread
96my @KCACHEGRIND = ("kcachegrind");
97my @PS2PDF = ("ps2pdf");
98# These are used for dynamic profiles
99my @URL_FETCHER = ("curl", "-s", "--fail");
100
101# These are the web pages that servers need to support for dynamic profiles
102my $HEAP_PAGE = "/pprof/heap";
103my $PROFILE_PAGE = "/pprof/profile";   # must support cgi-param "?seconds=#"
104my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param
105                                                # ?seconds=#&event=x&period=n
106my $GROWTH_PAGE = "/pprof/growth";
107my $CONTENTION_PAGE = "/pprof/contention";
108my $WALL_PAGE = "/pprof/wall(?:\\?.*)?";  # accepts options like namefilter
109my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?";
110my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param
111                                                       # "?seconds=#",
112                                                       # "?tags_regexp=#" and
113                                                       # "?type=#".
114my $SYMBOL_PAGE = "/pprof/symbol";     # must support symbol lookup via POST
115my $PROGRAM_NAME_PAGE = "/pprof/cmdline";
116
117# These are the web pages that can be named on the command line.
118# All the alternatives must begin with /.
119my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" .
120               "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" .
121               "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)";
122
123# default binary name
124my $UNKNOWN_BINARY = "(unknown)";
125
126# There is a pervasive dependency on the length (in hex characters,
127# i.e., nibbles) of an address, distinguishing between 32-bit and
128# 64-bit profiles.  To err on the safe size, default to 64-bit here:
129my $address_length = 16;
130
131my $dev_null = "/dev/null";
132if (! -e $dev_null && $^O =~ /MSWin/) {    # $^O is the OS perl was built for
133  $dev_null = "nul";
134}
135
136# A list of paths to search for shared object files
137my @prefix_list = ();
138
139# Special routine name that should not have any symbols.
140# Used as separator to parse "addr2line -i" output.
141my $sep_symbol = '_fini';
142my $sep_address = undef;
143
144##### Argument parsing #####
145
146sub usage_string {
147  return <<EOF;
148Usage:
149jeprof [options] <program> <profiles>
150   <profiles> is a space separated list of profile names.
151jeprof [options] <symbolized-profiles>
152   <symbolized-profiles> is a list of profile files where each file contains
153   the necessary symbol mappings  as well as profile data (likely generated
154   with --raw).
155jeprof [options] <profile>
156   <profile> is a remote form.  Symbols are obtained from host:port$SYMBOL_PAGE
157
158   Each name can be:
159   /path/to/profile        - a path to a profile file
160   host:port[/<service>]   - a location of a service to get profile from
161
162   The /<service> can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile,
163                         $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall,
164                         $CENSUSPROFILE_PAGE, or /pprof/filteredprofile.
165   For instance:
166     jeprof http://myserver.com:80$HEAP_PAGE
167   If /<service> is omitted, the service defaults to $PROFILE_PAGE (cpu profiling).
168jeprof --symbols <program>
169   Maps addresses to symbol names.  In this mode, stdin should be a
170   list of library mappings, in the same format as is found in the heap-
171   and cpu-profile files (this loosely matches that of /proc/self/maps
172   on linux), followed by a list of hex addresses to map, one per line.
173
174   For more help with querying remote servers, including how to add the
175   necessary server-side support code, see this filename (or one like it):
176
177   /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html
178
179Options:
180   --cum               Sort by cumulative data
181   --base=<base>       Subtract <base> from <profile> before display
182   --interactive       Run in interactive mode (interactive "help" gives help) [default]
183   --seconds=<n>       Length of time for dynamic profiles [default=30 secs]
184   --add_lib=<file>    Read additional symbols and line info from the given library
185   --lib_prefix=<dir>  Comma separated list of library path prefixes
186
187Reporting Granularity:
188   --addresses         Report at address level
189   --lines             Report at source line level
190   --functions         Report at function level [default]
191   --files             Report at source file level
192
193Output type:
194   --text              Generate text report
195   --callgrind         Generate callgrind format to stdout
196   --gv                Generate Postscript and display
197   --evince            Generate PDF and display
198   --web               Generate SVG and display
199   --list=<regexp>     Generate source listing of matching routines
200   --disasm=<regexp>   Generate disassembly of matching routines
201   --symbols           Print demangled symbol names found at given addresses
202   --dot               Generate DOT file to stdout
203   --ps                Generate Postcript to stdout
204   --pdf               Generate PDF to stdout
205   --svg               Generate SVG to stdout
206   --gif               Generate GIF to stdout
207   --raw               Generate symbolized jeprof data (useful with remote fetch)
208   --collapsed         Generate collapsed stacks for building flame graphs
209                       (see http://www.brendangregg.com/flamegraphs.html)
210
211Heap-Profile Options:
212   --inuse_space       Display in-use (mega)bytes [default]
213   --inuse_objects     Display in-use objects
214   --alloc_space       Display allocated (mega)bytes
215   --alloc_objects     Display allocated objects
216   --show_bytes        Display space in bytes
217   --drop_negative     Ignore negative differences
218
219Contention-profile options:
220   --total_delay       Display total delay at each region [default]
221   --contentions       Display number of delays at each region
222   --mean_delay        Display mean delay at each region
223
224Call-graph Options:
225   --nodecount=<n>     Show at most so many nodes [default=80]
226   --nodefraction=<f>  Hide nodes below <f>*total [default=.005]
227   --edgefraction=<f>  Hide edges below <f>*total [default=.001]
228   --maxdegree=<n>     Max incoming/outgoing edges per node [default=8]
229   --focus=<regexp>    Focus on backtraces with nodes matching <regexp>
230   --thread=<n>        Show profile for thread <n>
231   --ignore=<regexp>   Ignore backtraces with nodes matching <regexp>
232   --scale=<n>         Set GV scaling [default=0]
233   --heapcheck         Make nodes with non-0 object counts
234                       (i.e. direct leak generators) more visible
235   --retain=<regexp>   Retain only nodes that match <regexp>
236   --exclude=<regexp>  Exclude all nodes that match <regexp>
237
238Miscellaneous:
239   --tools=<prefix or binary:fullpath>[,...]   \$PATH for object tool pathnames
240   --test              Run unit tests
241   --help              This message
242   --version           Version information
243   --debug-syms-by-id  (Linux only) Find debug symbol files by build ID as well as by name
244
245Environment Variables:
246   JEPROF_TMPDIR        Profiles directory. Defaults to \$HOME/jeprof
247   JEPROF_TOOLS         Prefix for object tools pathnames
248
249Examples:
250
251jeprof /bin/ls ls.prof
252                       Enters "interactive" mode
253jeprof --text /bin/ls ls.prof
254                       Outputs one line per procedure
255jeprof --web /bin/ls ls.prof
256                       Displays annotated call-graph in web browser
257jeprof --gv /bin/ls ls.prof
258                       Displays annotated call-graph via 'gv'
259jeprof --gv --focus=Mutex /bin/ls ls.prof
260                       Restricts to code paths including a .*Mutex.* entry
261jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof
262                       Code paths including Mutex but not string
263jeprof --list=getdir /bin/ls ls.prof
264                       (Per-line) annotated source listing for getdir()
265jeprof --disasm=getdir /bin/ls ls.prof
266                       (Per-PC) annotated disassembly for getdir()
267
268jeprof http://localhost:1234/
269                       Enters "interactive" mode
270jeprof --text localhost:1234
271                       Outputs one line per procedure for localhost:1234
272jeprof --raw localhost:1234 > ./local.raw
273jeprof --text ./local.raw
274                       Fetches a remote profile for later analysis and then
275                       analyzes it in text mode.
276EOF
277}
278
279sub version_string {
280  return <<EOF
281jeprof (part of jemalloc $JEPROF_VERSION)
282based on pprof (part of gperftools $PPROF_VERSION)
283
284Copyright 1998-2007 Google Inc.
285
286This is BSD licensed software; see the source for copying conditions
287and license information.
288There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A
289PARTICULAR PURPOSE.
290EOF
291}
292
293sub usage {
294  my $msg = shift;
295  print STDERR "$msg\n\n";
296  print STDERR usage_string();
297  print STDERR "\nFATAL ERROR: $msg\n";    # just as a reminder
298  exit(1);
299}
300
301sub Init() {
302  # Setup tmp-file name and handler to clean it up.
303  # We do this in the very beginning so that we can use
304  # error() and cleanup() function anytime here after.
305  $main::tmpfile_sym = "/tmp/jeprof$$.sym";
306  $main::tmpfile_ps = "/tmp/jeprof$$";
307  $main::next_tmpfile = 0;
308  $SIG{'INT'} = \&sighandler;
309
310  # Cache from filename/linenumber to source code
311  $main::source_cache = ();
312
313  $main::opt_help = 0;
314  $main::opt_version = 0;
315
316  $main::opt_cum = 0;
317  $main::opt_base = '';
318  $main::opt_addresses = 0;
319  $main::opt_lines = 0;
320  $main::opt_functions = 0;
321  $main::opt_files = 0;
322  $main::opt_lib_prefix = "";
323
324  $main::opt_text = 0;
325  $main::opt_callgrind = 0;
326  $main::opt_list = "";
327  $main::opt_disasm = "";
328  $main::opt_symbols = 0;
329  $main::opt_gv = 0;
330  $main::opt_evince = 0;
331  $main::opt_web = 0;
332  $main::opt_dot = 0;
333  $main::opt_ps = 0;
334  $main::opt_pdf = 0;
335  $main::opt_gif = 0;
336  $main::opt_svg = 0;
337  $main::opt_raw = 0;
338  $main::opt_collapsed = 0;
339
340  $main::opt_nodecount = 80;
341  $main::opt_nodefraction = 0.005;
342  $main::opt_edgefraction = 0.001;
343  $main::opt_maxdegree = 8;
344  $main::opt_focus = '';
345  $main::opt_thread = undef;
346  $main::opt_ignore = '';
347  $main::opt_scale = 0;
348  $main::opt_heapcheck = 0;
349  $main::opt_retain = '';
350  $main::opt_exclude = '';
351  $main::opt_seconds = 30;
352  $main::opt_lib = "";
353
354  $main::opt_inuse_space   = 0;
355  $main::opt_inuse_objects = 0;
356  $main::opt_alloc_space   = 0;
357  $main::opt_alloc_objects = 0;
358  $main::opt_show_bytes    = 0;
359  $main::opt_drop_negative = 0;
360  $main::opt_interactive   = 0;
361
362  $main::opt_total_delay = 0;
363  $main::opt_contentions = 0;
364  $main::opt_mean_delay = 0;
365
366  $main::opt_tools   = "";
367  $main::opt_debug   = 0;
368  $main::opt_test    = 0;
369  $main::opt_debug_syms_by_id = 0;
370
371  # These are undocumented flags used only by unittests.
372  $main::opt_test_stride = 0;
373
374  # Are we using $SYMBOL_PAGE?
375  $main::use_symbol_page = 0;
376
377  # Files returned by TempName.
378  %main::tempnames = ();
379
380  # Type of profile we are dealing with
381  # Supported types:
382  #     cpu
383  #     heap
384  #     growth
385  #     contention
386  $main::profile_type = '';     # Empty type means "unknown"
387
388  GetOptions("help!"          => \$main::opt_help,
389             "version!"       => \$main::opt_version,
390             "cum!"           => \$main::opt_cum,
391             "base=s"         => \$main::opt_base,
392             "seconds=i"      => \$main::opt_seconds,
393             "add_lib=s"      => \$main::opt_lib,
394             "lib_prefix=s"   => \$main::opt_lib_prefix,
395             "functions!"     => \$main::opt_functions,
396             "lines!"         => \$main::opt_lines,
397             "addresses!"     => \$main::opt_addresses,
398             "files!"         => \$main::opt_files,
399             "text!"          => \$main::opt_text,
400             "callgrind!"     => \$main::opt_callgrind,
401             "list=s"         => \$main::opt_list,
402             "disasm=s"       => \$main::opt_disasm,
403             "symbols!"       => \$main::opt_symbols,
404             "gv!"            => \$main::opt_gv,
405             "evince!"        => \$main::opt_evince,
406             "web!"           => \$main::opt_web,
407             "dot!"           => \$main::opt_dot,
408             "ps!"            => \$main::opt_ps,
409             "pdf!"           => \$main::opt_pdf,
410             "svg!"           => \$main::opt_svg,
411             "gif!"           => \$main::opt_gif,
412             "raw!"           => \$main::opt_raw,
413             "collapsed!"     => \$main::opt_collapsed,
414             "interactive!"   => \$main::opt_interactive,
415             "nodecount=i"    => \$main::opt_nodecount,
416             "nodefraction=f" => \$main::opt_nodefraction,
417             "edgefraction=f" => \$main::opt_edgefraction,
418             "maxdegree=i"    => \$main::opt_maxdegree,
419             "focus=s"        => \$main::opt_focus,
420             "thread=s"       => \$main::opt_thread,
421             "ignore=s"       => \$main::opt_ignore,
422             "scale=i"        => \$main::opt_scale,
423             "heapcheck"      => \$main::opt_heapcheck,
424             "retain=s"       => \$main::opt_retain,
425             "exclude=s"      => \$main::opt_exclude,
426             "inuse_space!"   => \$main::opt_inuse_space,
427             "inuse_objects!" => \$main::opt_inuse_objects,
428             "alloc_space!"   => \$main::opt_alloc_space,
429             "alloc_objects!" => \$main::opt_alloc_objects,
430             "show_bytes!"    => \$main::opt_show_bytes,
431             "drop_negative!" => \$main::opt_drop_negative,
432             "total_delay!"   => \$main::opt_total_delay,
433             "contentions!"   => \$main::opt_contentions,
434             "mean_delay!"    => \$main::opt_mean_delay,
435             "tools=s"        => \$main::opt_tools,
436             "test!"          => \$main::opt_test,
437             "debug!"         => \$main::opt_debug,
438             "debug-syms-by-id!" => \$main::opt_debug_syms_by_id,
439             # Undocumented flags used only by unittests:
440             "test_stride=i"  => \$main::opt_test_stride,
441      ) || usage("Invalid option(s)");
442
443  # Deal with the standard --help and --version
444  if ($main::opt_help) {
445    print usage_string();
446    exit(0);
447  }
448
449  if ($main::opt_version) {
450    print version_string();
451    exit(0);
452  }
453
454  # Disassembly/listing/symbols mode requires address-level info
455  if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) {
456    $main::opt_functions = 0;
457    $main::opt_lines = 0;
458    $main::opt_addresses = 1;
459    $main::opt_files = 0;
460  }
461
462  # Check heap-profiling flags
463  if ($main::opt_inuse_space +
464      $main::opt_inuse_objects +
465      $main::opt_alloc_space +
466      $main::opt_alloc_objects > 1) {
467    usage("Specify at most on of --inuse/--alloc options");
468  }
469
470  # Check output granularities
471  my $grains =
472      $main::opt_functions +
473      $main::opt_lines +
474      $main::opt_addresses +
475      $main::opt_files +
476      0;
477  if ($grains > 1) {
478    usage("Only specify one output granularity option");
479  }
480  if ($grains == 0) {
481    $main::opt_functions = 1;
482  }
483
484  # Check output modes
485  my $modes =
486      $main::opt_text +
487      $main::opt_callgrind +
488      ($main::opt_list eq '' ? 0 : 1) +
489      ($main::opt_disasm eq '' ? 0 : 1) +
490      ($main::opt_symbols == 0 ? 0 : 1) +
491      $main::opt_gv +
492      $main::opt_evince +
493      $main::opt_web +
494      $main::opt_dot +
495      $main::opt_ps +
496      $main::opt_pdf +
497      $main::opt_svg +
498      $main::opt_gif +
499      $main::opt_raw +
500      $main::opt_collapsed +
501      $main::opt_interactive +
502      0;
503  if ($modes > 1) {
504    usage("Only specify one output mode");
505  }
506  if ($modes == 0) {
507    if (-t STDOUT) {  # If STDOUT is a tty, activate interactive mode
508      $main::opt_interactive = 1;
509    } else {
510      $main::opt_text = 1;
511    }
512  }
513
514  if ($main::opt_test) {
515    RunUnitTests();
516    # Should not return
517    exit(1);
518  }
519
520  # Binary name and profile arguments list
521  $main::prog = "";
522  @main::pfile_args = ();
523
524  # Remote profiling without a binary (using $SYMBOL_PAGE instead)
525  if (@ARGV > 0) {
526    if (IsProfileURL($ARGV[0])) {
527      $main::use_symbol_page = 1;
528    } elsif (IsSymbolizedProfileFile($ARGV[0])) {
529      $main::use_symbolized_profile = 1;
530      $main::prog = $UNKNOWN_BINARY;  # will be set later from the profile file
531    }
532  }
533
534  if ($main::use_symbol_page || $main::use_symbolized_profile) {
535    # We don't need a binary!
536    my %disabled = ('--lines' => $main::opt_lines,
537                    '--disasm' => $main::opt_disasm);
538    for my $option (keys %disabled) {
539      usage("$option cannot be used without a binary") if $disabled{$option};
540    }
541    # Set $main::prog later...
542    scalar(@ARGV) || usage("Did not specify profile file");
543  } elsif ($main::opt_symbols) {
544    # --symbols needs a binary-name (to run nm on, etc) but not profiles
545    $main::prog = shift(@ARGV) || usage("Did not specify program");
546  } else {
547    $main::prog = shift(@ARGV) || usage("Did not specify program");
548    scalar(@ARGV) || usage("Did not specify profile file");
549  }
550
551  # Parse profile file/location arguments
552  foreach my $farg (@ARGV) {
553    if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) {
554      my $machine = $1;
555      my $num_machines = $2;
556      my $path = $3;
557      for (my $i = 0; $i < $num_machines; $i++) {
558        unshift(@main::pfile_args, "$i.$machine$path");
559      }
560    } else {
561      unshift(@main::pfile_args, $farg);
562    }
563  }
564
565  if ($main::use_symbol_page) {
566    unless (IsProfileURL($main::pfile_args[0])) {
567      error("The first profile should be a remote form to use $SYMBOL_PAGE\n");
568    }
569    CheckSymbolPage();
570    $main::prog = FetchProgramName();
571  } elsif (!$main::use_symbolized_profile) {  # may not need objtools!
572    ConfigureObjTools($main::prog)
573  }
574
575  # Break the opt_lib_prefix into the prefix_list array
576  @prefix_list = split (',', $main::opt_lib_prefix);
577
578  # Remove trailing / from the prefixes, in the list to prevent
579  # searching things like /my/path//lib/mylib.so
580  foreach (@prefix_list) {
581    s|/+$||;
582  }
583
584  # Flag to prevent us from trying over and over to use
585  #  elfutils if it's not installed (used only with
586  #  --debug-syms-by-id option).
587  $main::gave_up_on_elfutils = 0;
588}
589
590sub FilterAndPrint {
591  my ($profile, $symbols, $libs, $thread) = @_;
592
593  # Get total data in profile
594  my $total = TotalProfile($profile);
595
596  # Remove uniniteresting stack items
597  $profile = RemoveUninterestingFrames($symbols, $profile);
598
599  # Focus?
600  if ($main::opt_focus ne '') {
601    $profile = FocusProfile($symbols, $profile, $main::opt_focus);
602  }
603
604  # Ignore?
605  if ($main::opt_ignore ne '') {
606    $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore);
607  }
608
609  my $calls = ExtractCalls($symbols, $profile);
610
611  # Reduce profiles to required output granularity, and also clean
612  # each stack trace so a given entry exists at most once.
613  my $reduced = ReduceProfile($symbols, $profile);
614
615  # Get derived profiles
616  my $flat = FlatProfile($reduced);
617  my $cumulative = CumulativeProfile($reduced);
618
619  # Print
620  if (!$main::opt_interactive) {
621    if ($main::opt_disasm) {
622      PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm);
623    } elsif ($main::opt_list) {
624      PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0);
625    } elsif ($main::opt_text) {
626      # Make sure the output is empty when have nothing to report
627      # (only matters when --heapcheck is given but we must be
628      # compatible with old branches that did not pass --heapcheck always):
629      if ($total != 0) {
630        printf("Total%s: %s %s\n",
631               (defined($thread) ? " (t$thread)" : ""),
632               Unparse($total), Units());
633      }
634      PrintText($symbols, $flat, $cumulative, -1);
635    } elsif ($main::opt_raw) {
636      PrintSymbolizedProfile($symbols, $profile, $main::prog);
637    } elsif ($main::opt_collapsed) {
638      PrintCollapsedStacks($symbols, $profile);
639    } elsif ($main::opt_callgrind) {
640      PrintCallgrind($calls);
641    } else {
642      if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
643        if ($main::opt_gv) {
644          RunGV(TempName($main::next_tmpfile, "ps"), "");
645        } elsif ($main::opt_evince) {
646          RunEvince(TempName($main::next_tmpfile, "pdf"), "");
647        } elsif ($main::opt_web) {
648          my $tmp = TempName($main::next_tmpfile, "svg");
649          RunWeb($tmp);
650          # The command we run might hand the file name off
651          # to an already running browser instance and then exit.
652          # Normally, we'd remove $tmp on exit (right now),
653          # but fork a child to remove $tmp a little later, so that the
654          # browser has time to load it first.
655          delete $main::tempnames{$tmp};
656          if (fork() == 0) {
657            sleep 5;
658            unlink($tmp);
659            exit(0);
660          }
661        }
662      } else {
663        cleanup();
664        exit(1);
665      }
666    }
667  } else {
668    InteractiveMode($profile, $symbols, $libs, $total);
669  }
670}
671
672sub Main() {
673  Init();
674  $main::collected_profile = undef;
675  @main::profile_files = ();
676  $main::op_time = time();
677
678  # Printing symbols is special and requires a lot less info that most.
679  if ($main::opt_symbols) {
680    PrintSymbols(*STDIN);   # Get /proc/maps and symbols output from stdin
681    return;
682  }
683
684  # Fetch all profile data
685  FetchDynamicProfiles();
686
687  # this will hold symbols that we read from the profile files
688  my $symbol_map = {};
689
690  # Read one profile, pick the last item on the list
691  my $data = ReadProfile($main::prog, pop(@main::profile_files));
692  my $profile = $data->{profile};
693  my $pcs = $data->{pcs};
694  my $libs = $data->{libs};   # Info about main program and shared libraries
695  $symbol_map = MergeSymbols($symbol_map, $data->{symbols});
696
697  # Add additional profiles, if available.
698  if (scalar(@main::profile_files) > 0) {
699    foreach my $pname (@main::profile_files) {
700      my $data2 = ReadProfile($main::prog, $pname);
701      $profile = AddProfile($profile, $data2->{profile});
702      $pcs = AddPcs($pcs, $data2->{pcs});
703      $symbol_map = MergeSymbols($symbol_map, $data2->{symbols});
704    }
705  }
706
707  # Subtract base from profile, if specified
708  if ($main::opt_base ne '') {
709    my $base = ReadProfile($main::prog, $main::opt_base);
710    $profile = SubtractProfile($profile, $base->{profile});
711    $pcs = AddPcs($pcs, $base->{pcs});
712    $symbol_map = MergeSymbols($symbol_map, $base->{symbols});
713  }
714
715  # Collect symbols
716  my $symbols;
717  if ($main::use_symbolized_profile) {
718    $symbols = FetchSymbols($pcs, $symbol_map);
719  } elsif ($main::use_symbol_page) {
720    $symbols = FetchSymbols($pcs);
721  } else {
722    # TODO(csilvers): $libs uses the /proc/self/maps data from profile1,
723    # which may differ from the data from subsequent profiles, especially
724    # if they were run on different machines.  Use appropriate libs for
725    # each pc somehow.
726    $symbols = ExtractSymbols($libs, $pcs);
727  }
728
729  if (!defined($main::opt_thread)) {
730    FilterAndPrint($profile, $symbols, $libs);
731  }
732  if (defined($data->{threads})) {
733    foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) {
734      if (defined($main::opt_thread) &&
735          ($main::opt_thread eq '*' || $main::opt_thread == $thread)) {
736        my $thread_profile = $data->{threads}{$thread};
737        FilterAndPrint($thread_profile, $symbols, $libs, $thread);
738      }
739    }
740  }
741
742  cleanup();
743  exit(0);
744}
745
746##### Entry Point #####
747
748Main();
749
750# Temporary code to detect if we're running on a Goobuntu system.
751# These systems don't have the right stuff installed for the special
752# Readline libraries to work, so as a temporary workaround, we default
753# to using the normal stdio code, rather than the fancier readline-based
754# code
755sub ReadlineMightFail {
756  if (-e '/lib/libtermcap.so.2') {
757    return 0;  # libtermcap exists, so readline should be okay
758  } else {
759    return 1;
760  }
761}
762
763sub RunGV {
764  my $fname = shift;
765  my $bg = shift;       # "" or " &" if we should run in background
766  if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) {
767    # Options using double dash are supported by this gv version.
768    # Also, turn on noantialias to better handle bug in gv for
769    # postscript files with large dimensions.
770    # TODO: Maybe we should not pass the --noantialias flag
771    # if the gv version is known to work properly without the flag.
772    system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname)
773           . $bg);
774  } else {
775    # Old gv version - only supports options that use single dash.
776    print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n";
777    system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg);
778  }
779}
780
781sub RunEvince {
782  my $fname = shift;
783  my $bg = shift;       # "" or " &" if we should run in background
784  system(ShellEscape(@EVINCE, $fname) . $bg);
785}
786
787sub RunWeb {
788  my $fname = shift;
789  print STDERR "Loading web page file:///$fname\n";
790
791  if (`uname` =~ /Darwin/) {
792    # OS X: open will use standard preference for SVG files.
793    system("/usr/bin/open", $fname);
794    return;
795  }
796
797  # Some kind of Unix; try generic symlinks, then specific browsers.
798  # (Stop once we find one.)
799  # Works best if the browser is already running.
800  my @alt = (
801    "/etc/alternatives/gnome-www-browser",
802    "/etc/alternatives/x-www-browser",
803    "google-chrome",
804    "firefox",
805  );
806  foreach my $b (@alt) {
807    if (system($b, $fname) == 0) {
808      return;
809    }
810  }
811
812  print STDERR "Could not load web browser.\n";
813}
814
815sub RunKcachegrind {
816  my $fname = shift;
817  my $bg = shift;       # "" or " &" if we should run in background
818  print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n";
819  system(ShellEscape(@KCACHEGRIND, $fname) . $bg);
820}
821
822
823##### Interactive helper routines #####
824
825sub InteractiveMode {
826  $| = 1;  # Make output unbuffered for interactive mode
827  my ($orig_profile, $symbols, $libs, $total) = @_;
828
829  print STDERR "Welcome to jeprof!  For help, type 'help'.\n";
830
831  # Use ReadLine if it's installed and input comes from a console.
832  if ( -t STDIN &&
833       !ReadlineMightFail() &&
834       defined(eval {require Term::ReadLine}) ) {
835    my $term = new Term::ReadLine 'jeprof';
836    while ( defined ($_ = $term->readline('(jeprof) '))) {
837      $term->addhistory($_) if /\S/;
838      if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
839        last;    # exit when we get an interactive command to quit
840      }
841    }
842  } else {       # don't have readline
843    while (1) {
844      print STDERR "(jeprof) ";
845      $_ = <STDIN>;
846      last if ! defined $_ ;
847      s/\r//g;         # turn windows-looking lines into unix-looking lines
848
849      # Save some flags that might be reset by InteractiveCommand()
850      my $save_opt_lines = $main::opt_lines;
851
852      if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) {
853        last;    # exit when we get an interactive command to quit
854      }
855
856      # Restore flags
857      $main::opt_lines = $save_opt_lines;
858    }
859  }
860}
861
862# Takes two args: orig profile, and command to run.
863# Returns 1 if we should keep going, or 0 if we were asked to quit
864sub InteractiveCommand {
865  my($orig_profile, $symbols, $libs, $total, $command) = @_;
866  $_ = $command;                # just to make future m//'s easier
867  if (!defined($_)) {
868    print STDERR "\n";
869    return 0;
870  }
871  if (m/^\s*quit/) {
872    return 0;
873  }
874  if (m/^\s*help/) {
875    InteractiveHelpMessage();
876    return 1;
877  }
878  # Clear all the mode options -- mode is controlled by "$command"
879  $main::opt_text = 0;
880  $main::opt_callgrind = 0;
881  $main::opt_disasm = 0;
882  $main::opt_list = 0;
883  $main::opt_gv = 0;
884  $main::opt_evince = 0;
885  $main::opt_cum = 0;
886
887  if (m/^\s*(text|top)(\d*)\s*(.*)/) {
888    $main::opt_text = 1;
889
890    my $line_limit = ($2 ne "") ? int($2) : 10;
891
892    my $routine;
893    my $ignore;
894    ($routine, $ignore) = ParseInteractiveArgs($3);
895
896    my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore);
897    my $reduced = ReduceProfile($symbols, $profile);
898
899    # Get derived profiles
900    my $flat = FlatProfile($reduced);
901    my $cumulative = CumulativeProfile($reduced);
902
903    PrintText($symbols, $flat, $cumulative, $line_limit);
904    return 1;
905  }
906  if (m/^\s*callgrind\s*([^ \n]*)/) {
907    $main::opt_callgrind = 1;
908
909    # Get derived profiles
910    my $calls = ExtractCalls($symbols, $orig_profile);
911    my $filename = $1;
912    if ( $1 eq '' ) {
913      $filename = TempName($main::next_tmpfile, "callgrind");
914    }
915    PrintCallgrind($calls, $filename);
916    if ( $1 eq '' ) {
917      RunKcachegrind($filename, " & ");
918      $main::next_tmpfile++;
919    }
920
921    return 1;
922  }
923  if (m/^\s*(web)?list\s*(.+)/) {
924    my $html = (defined($1) && ($1 eq "web"));
925    $main::opt_list = 1;
926
927    my $routine;
928    my $ignore;
929    ($routine, $ignore) = ParseInteractiveArgs($2);
930
931    my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore);
932    my $reduced = ReduceProfile($symbols, $profile);
933
934    # Get derived profiles
935    my $flat = FlatProfile($reduced);
936    my $cumulative = CumulativeProfile($reduced);
937
938    PrintListing($total, $libs, $flat, $cumulative, $routine, $html);
939    return 1;
940  }
941  if (m/^\s*disasm\s*(.+)/) {
942    $main::opt_disasm = 1;
943
944    my $routine;
945    my $ignore;
946    ($routine, $ignore) = ParseInteractiveArgs($1);
947
948    # Process current profile to account for various settings
949    my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore);
950    my $reduced = ReduceProfile($symbols, $profile);
951
952    # Get derived profiles
953    my $flat = FlatProfile($reduced);
954    my $cumulative = CumulativeProfile($reduced);
955
956    PrintDisassembly($libs, $flat, $cumulative, $routine);
957    return 1;
958  }
959  if (m/^\s*(gv|web|evince)\s*(.*)/) {
960    $main::opt_gv = 0;
961    $main::opt_evince = 0;
962    $main::opt_web = 0;
963    if ($1 eq "gv") {
964      $main::opt_gv = 1;
965    } elsif ($1 eq "evince") {
966      $main::opt_evince = 1;
967    } elsif ($1 eq "web") {
968      $main::opt_web = 1;
969    }
970
971    my $focus;
972    my $ignore;
973    ($focus, $ignore) = ParseInteractiveArgs($2);
974
975    # Process current profile to account for various settings
976    my $profile = ProcessProfile($total, $orig_profile, $symbols,
977                                 $focus, $ignore);
978    my $reduced = ReduceProfile($symbols, $profile);
979
980    # Get derived profiles
981    my $flat = FlatProfile($reduced);
982    my $cumulative = CumulativeProfile($reduced);
983
984    if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) {
985      if ($main::opt_gv) {
986        RunGV(TempName($main::next_tmpfile, "ps"), " &");
987      } elsif ($main::opt_evince) {
988        RunEvince(TempName($main::next_tmpfile, "pdf"), " &");
989      } elsif ($main::opt_web) {
990        RunWeb(TempName($main::next_tmpfile, "svg"));
991      }
992      $main::next_tmpfile++;
993    }
994    return 1;
995  }
996  if (m/^\s*$/) {
997    return 1;
998  }
999  print STDERR "Unknown command: try 'help'.\n";
1000  return 1;
1001}
1002
1003
1004sub ProcessProfile {
1005  my $total_count = shift;
1006  my $orig_profile = shift;
1007  my $symbols = shift;
1008  my $focus = shift;
1009  my $ignore = shift;
1010
1011  # Process current profile to account for various settings
1012  my $profile = $orig_profile;
1013  printf("Total: %s %s\n", Unparse($total_count), Units());
1014  if ($focus ne '') {
1015    $profile = FocusProfile($symbols, $profile, $focus);
1016    my $focus_count = TotalProfile($profile);
1017    printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n",
1018           $focus,
1019           Unparse($focus_count), Units(),
1020           Unparse($total_count), ($focus_count*100.0) / $total_count);
1021  }
1022  if ($ignore ne '') {
1023    $profile = IgnoreProfile($symbols, $profile, $ignore);
1024    my $ignore_count = TotalProfile($profile);
1025    printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n",
1026           $ignore,
1027           Unparse($ignore_count), Units(),
1028           Unparse($total_count),
1029           ($ignore_count*100.0) / $total_count);
1030  }
1031
1032  return $profile;
1033}
1034
1035sub InteractiveHelpMessage {
1036  print STDERR <<ENDOFHELP;
1037Interactive jeprof mode
1038
1039Commands:
1040  gv
1041  gv [focus] [-ignore1] [-ignore2]
1042      Show graphical hierarchical display of current profile.  Without
1043      any arguments, shows all samples in the profile.  With the optional
1044      "focus" argument, restricts the samples shown to just those where
1045      the "focus" regular expression matches a routine name on the stack
1046      trace.
1047
1048  web
1049  web [focus] [-ignore1] [-ignore2]
1050      Like GV, but displays profile in your web browser instead of using
1051      Ghostview. Works best if your web browser is already running.
1052      To change the browser that gets used:
1053      On Linux, set the /etc/alternatives/gnome-www-browser symlink.
1054      On OS X, change the Finder association for SVG files.
1055
1056  list [routine_regexp] [-ignore1] [-ignore2]
1057      Show source listing of routines whose names match "routine_regexp"
1058
1059  weblist [routine_regexp] [-ignore1] [-ignore2]
1060     Displays a source listing of routines whose names match "routine_regexp"
1061     in a web browser.  You can click on source lines to view the
1062     corresponding disassembly.
1063
1064  top [--cum] [-ignore1] [-ignore2]
1065  top20 [--cum] [-ignore1] [-ignore2]
1066  top37 [--cum] [-ignore1] [-ignore2]
1067      Show top lines ordered by flat profile count, or cumulative count
1068      if --cum is specified.  If a number is present after 'top', the
1069      top K routines will be shown (defaults to showing the top 10)
1070
1071  disasm [routine_regexp] [-ignore1] [-ignore2]
1072      Show disassembly of routines whose names match "routine_regexp",
1073      annotated with sample counts.
1074
1075  callgrind
1076  callgrind [filename]
1077      Generates callgrind file. If no filename is given, kcachegrind is called.
1078
1079  help - This listing
1080  quit or ^D - End jeprof
1081
1082For commands that accept optional -ignore tags, samples where any routine in
1083the stack trace matches the regular expression in any of the -ignore
1084parameters will be ignored.
1085
1086Further pprof details are available at this location (or one similar):
1087
1088 /usr/doc/gperftools-$PPROF_VERSION/cpu_profiler.html
1089 /usr/doc/gperftools-$PPROF_VERSION/heap_profiler.html
1090
1091ENDOFHELP
1092}
1093sub ParseInteractiveArgs {
1094  my $args = shift;
1095  my $focus = "";
1096  my $ignore = "";
1097  my @x = split(/ +/, $args);
1098  foreach $a (@x) {
1099    if ($a =~ m/^(--|-)lines$/) {
1100      $main::opt_lines = 1;
1101    } elsif ($a =~ m/^(--|-)cum$/) {
1102      $main::opt_cum = 1;
1103    } elsif ($a =~ m/^-(.*)/) {
1104      $ignore .= (($ignore ne "") ? "|" : "" ) . $1;
1105    } else {
1106      $focus .= (($focus ne "") ? "|" : "" ) . $a;
1107    }
1108  }
1109  if ($ignore ne "") {
1110    print STDERR "Ignoring samples in call stacks that match '$ignore'\n";
1111  }
1112  return ($focus, $ignore);
1113}
1114
1115##### Output code #####
1116
1117sub TempName {
1118  my $fnum = shift;
1119  my $ext = shift;
1120  my $file = "$main::tmpfile_ps.$fnum.$ext";
1121  $main::tempnames{$file} = 1;
1122  return $file;
1123}
1124
1125# Print profile data in packed binary format (64-bit) to standard out
1126sub PrintProfileData {
1127  my $profile = shift;
1128
1129  # print header (64-bit style)
1130  # (zero) (header-size) (version) (sample-period) (zero)
1131  print pack('L*', 0, 0, 3, 0, 0, 0, 1, 0, 0, 0);
1132
1133  foreach my $k (keys(%{$profile})) {
1134    my $count = $profile->{$k};
1135    my @addrs = split(/\n/, $k);
1136    if ($#addrs >= 0) {
1137      my $depth = $#addrs + 1;
1138      # int(foo / 2**32) is the only reliable way to get rid of bottom
1139      # 32 bits on both 32- and 64-bit systems.
1140      print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32));
1141      print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32));
1142
1143      foreach my $full_addr (@addrs) {
1144        my $addr = $full_addr;
1145        $addr =~ s/0x0*//;  # strip off leading 0x, zeroes
1146        if (length($addr) > 16) {
1147          print STDERR "Invalid address in profile: $full_addr\n";
1148          next;
1149        }
1150        my $low_addr = substr($addr, -8);       # get last 8 hex chars
1151        my $high_addr = substr($addr, -16, 8);  # get up to 8 more hex chars
1152        print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr));
1153      }
1154    }
1155  }
1156}
1157
1158# Print symbols and profile data
1159sub PrintSymbolizedProfile {
1160  my $symbols = shift;
1161  my $profile = shift;
1162  my $prog = shift;
1163
1164  $SYMBOL_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
1165  my $symbol_marker = $&;
1166
1167  print '--- ', $symbol_marker, "\n";
1168  if (defined($prog)) {
1169    print 'binary=', $prog, "\n";
1170  }
1171  while (my ($pc, $name) = each(%{$symbols})) {
1172    my $sep = ' ';
1173    print '0x', $pc;
1174    # We have a list of function names, which include the inlined
1175    # calls.  They are separated (and terminated) by --, which is
1176    # illegal in function names.
1177    for (my $j = 2; $j <= $#{$name}; $j += 3) {
1178      print $sep, $name->[$j];
1179      $sep = '--';
1180    }
1181    print "\n";
1182  }
1183  print '---', "\n";
1184
1185  my $profile_marker;
1186  if ($main::profile_type eq 'heap') {
1187    $HEAP_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
1188    $profile_marker = $&;
1189  } elsif ($main::profile_type eq 'growth') {
1190    $GROWTH_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
1191    $profile_marker = $&;
1192  } elsif ($main::profile_type eq 'contention') {
1193    $CONTENTION_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
1194    $profile_marker = $&;
1195  } else { # elsif ($main::profile_type eq 'cpu')
1196    $PROFILE_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
1197    $profile_marker = $&;
1198  }
1199
1200  print '--- ', $profile_marker, "\n";
1201  if (defined($main::collected_profile)) {
1202    # if used with remote fetch, simply dump the collected profile to output.
1203    open(SRC, "<$main::collected_profile");
1204    while (<SRC>) {
1205      print $_;
1206    }
1207    close(SRC);
1208  } else {
1209    # --raw/http: For everything to work correctly for non-remote profiles, we
1210    # would need to extend PrintProfileData() to handle all possible profile
1211    # types, re-enable the code that is currently disabled in ReadCPUProfile()
1212    # and FixCallerAddresses(), and remove the remote profile dumping code in
1213    # the block above.
1214    die "--raw/http: jeprof can only dump remote profiles for --raw\n";
1215    # dump a cpu-format profile to standard out
1216    PrintProfileData($profile);
1217  }
1218}
1219
1220# Print text output
1221sub PrintText {
1222  my $symbols = shift;
1223  my $flat = shift;
1224  my $cumulative = shift;
1225  my $line_limit = shift;
1226
1227  my $total = TotalProfile($flat);
1228
1229  # Which profile to sort by?
1230  my $s = $main::opt_cum ? $cumulative : $flat;
1231
1232  my $running_sum = 0;
1233  my $lines = 0;
1234  foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b }
1235                 keys(%{$cumulative})) {
1236    my $f = GetEntry($flat, $k);
1237    my $c = GetEntry($cumulative, $k);
1238    $running_sum += $f;
1239
1240    my $sym = $k;
1241    if (exists($symbols->{$k})) {
1242      $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1];
1243      if ($main::opt_addresses) {
1244        $sym = $k . " " . $sym;
1245      }
1246    }
1247
1248    if ($f != 0 || $c != 0) {
1249      printf("%8s %6s %6s %8s %6s %s\n",
1250             Unparse($f),
1251             Percent($f, $total),
1252             Percent($running_sum, $total),
1253             Unparse($c),
1254             Percent($c, $total),
1255             $sym);
1256    }
1257    $lines++;
1258    last if ($line_limit >= 0 && $lines >= $line_limit);
1259  }
1260}
1261
1262# Callgrind format has a compression for repeated function and file
1263# names.  You show the name the first time, and just use its number
1264# subsequently.  This can cut down the file to about a third or a
1265# quarter of its uncompressed size.  $key and $val are the key/value
1266# pair that would normally be printed by callgrind; $map is a map from
1267# value to number.
1268sub CompressedCGName {
1269  my($key, $val, $map) = @_;
1270  my $idx = $map->{$val};
1271  # For very short keys, providing an index hurts rather than helps.
1272  if (length($val) <= 3) {
1273    return "$key=$val\n";
1274  } elsif (defined($idx)) {
1275    return "$key=($idx)\n";
1276  } else {
1277    # scalar(keys $map) gives the number of items in the map.
1278    $idx = scalar(keys(%{$map})) + 1;
1279    $map->{$val} = $idx;
1280    return "$key=($idx) $val\n";
1281  }
1282}
1283
1284# Print the call graph in a way that's suiteable for callgrind.
1285sub PrintCallgrind {
1286  my $calls = shift;
1287  my $filename;
1288  my %filename_to_index_map;
1289  my %fnname_to_index_map;
1290
1291  if ($main::opt_interactive) {
1292    $filename = shift;
1293    print STDERR "Writing callgrind file to '$filename'.\n"
1294  } else {
1295    $filename = "&STDOUT";
1296  }
1297  open(CG, ">$filename");
1298  printf CG ("events: Hits\n\n");
1299  foreach my $call ( map { $_->[0] }
1300                     sort { $a->[1] cmp $b ->[1] ||
1301                            $a->[2] <=> $b->[2] }
1302                     map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/;
1303                           [$_, $1, $2] }
1304                     keys %$calls ) {
1305    my $count = int($calls->{$call});
1306    $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/;
1307    my ( $caller_file, $caller_line, $caller_function,
1308         $callee_file, $callee_line, $callee_function ) =
1309       ( $1, $2, $3, $5, $6, $7 );
1310
1311    # TODO(csilvers): for better compression, collect all the
1312    # caller/callee_files and functions first, before printing
1313    # anything, and only compress those referenced more than once.
1314    printf CG CompressedCGName("fl", $caller_file, \%filename_to_index_map);
1315    printf CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map);
1316    if (defined $6) {
1317      printf CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map);
1318      printf CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map);
1319      printf CG ("calls=$count $callee_line\n");
1320    }
1321    printf CG ("$caller_line $count\n\n");
1322  }
1323}
1324
1325# Print disassembly for all all routines that match $main::opt_disasm
1326sub PrintDisassembly {
1327  my $libs = shift;
1328  my $flat = shift;
1329  my $cumulative = shift;
1330  my $disasm_opts = shift;
1331
1332  my $total = TotalProfile($flat);
1333
1334  foreach my $lib (@{$libs}) {
1335    my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts);
1336    my $offset = AddressSub($lib->[1], $lib->[3]);
1337    foreach my $routine (sort ByName keys(%{$symbol_table})) {
1338      my $start_addr = $symbol_table->{$routine}->[0];
1339      my $end_addr = $symbol_table->{$routine}->[1];
1340      # See if there are any samples in this routine
1341      my $length = hex(AddressSub($end_addr, $start_addr));
1342      my $addr = AddressAdd($start_addr, $offset);
1343      for (my $i = 0; $i < $length; $i++) {
1344        if (defined($cumulative->{$addr})) {
1345          PrintDisassembledFunction($lib->[0], $offset,
1346                                    $routine, $flat, $cumulative,
1347                                    $start_addr, $end_addr, $total);
1348          last;
1349        }
1350        $addr = AddressInc($addr);
1351      }
1352    }
1353  }
1354}
1355
1356# Return reference to array of tuples of the form:
1357#       [start_address, filename, linenumber, instruction, limit_address]
1358# E.g.,
1359#       ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"]
1360sub Disassemble {
1361  my $prog = shift;
1362  my $offset = shift;
1363  my $start_addr = shift;
1364  my $end_addr = shift;
1365
1366  my $objdump = $obj_tool_map{"objdump"};
1367  my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn",
1368                        "--start-address=0x$start_addr",
1369                        "--stop-address=0x$end_addr", $prog);
1370  open(OBJDUMP, "$cmd |") || error("$cmd: $!\n");
1371  my @result = ();
1372  my $filename = "";
1373  my $linenumber = -1;
1374  my $last = ["", "", "", ""];
1375  while (<OBJDUMP>) {
1376    s/\r//g;         # turn windows-looking lines into unix-looking lines
1377    chop;
1378    if (m|\s*([^:\s]+):(\d+)\s*$|) {
1379      # Location line of the form:
1380      #   <filename>:<linenumber>
1381      $filename = $1;
1382      $linenumber = $2;
1383    } elsif (m/^ +([0-9a-f]+):\s*(.*)/) {
1384      # Disassembly line -- zero-extend address to full length
1385      my $addr = HexExtend($1);
1386      my $k = AddressAdd($addr, $offset);
1387      $last->[4] = $k;   # Store ending address for previous instruction
1388      $last = [$k, $filename, $linenumber, $2, $end_addr];
1389      push(@result, $last);
1390    }
1391  }
1392  close(OBJDUMP);
1393  return @result;
1394}
1395
1396# The input file should contain lines of the form /proc/maps-like
1397# output (same format as expected from the profiles) or that looks
1398# like hex addresses (like "0xDEADBEEF").  We will parse all
1399# /proc/maps output, and for all the hex addresses, we will output
1400# "short" symbol names, one per line, in the same order as the input.
1401sub PrintSymbols {
1402  my $maps_and_symbols_file = shift;
1403
1404  # ParseLibraries expects pcs to be in a set.  Fine by us...
1405  my @pclist = ();   # pcs in sorted order
1406  my $pcs = {};
1407  my $map = "";
1408  foreach my $line (<$maps_and_symbols_file>) {
1409    $line =~ s/\r//g;    # turn windows-looking lines into unix-looking lines
1410    if ($line =~ /\b(0x[0-9a-f]+)\b/i) {
1411      push(@pclist, HexExtend($1));
1412      $pcs->{$pclist[-1]} = 1;
1413    } else {
1414      $map .= $line;
1415    }
1416  }
1417
1418  my $libs = ParseLibraries($main::prog, $map, $pcs);
1419  my $symbols = ExtractSymbols($libs, $pcs);
1420
1421  foreach my $pc (@pclist) {
1422    # ->[0] is the shortname, ->[2] is the full name
1423    print(($symbols->{$pc}->[0] || "??") . "\n");
1424  }
1425}
1426
1427
1428# For sorting functions by name
1429sub ByName {
1430  return ShortFunctionName($a) cmp ShortFunctionName($b);
1431}
1432
1433# Print source-listing for all all routines that match $list_opts
1434sub PrintListing {
1435  my $total = shift;
1436  my $libs = shift;
1437  my $flat = shift;
1438  my $cumulative = shift;
1439  my $list_opts = shift;
1440  my $html = shift;
1441
1442  my $output = \*STDOUT;
1443  my $fname = "";
1444
1445  if ($html) {
1446    # Arrange to write the output to a temporary file
1447    $fname = TempName($main::next_tmpfile, "html");
1448    $main::next_tmpfile++;
1449    if (!open(TEMP, ">$fname")) {
1450      print STDERR "$fname: $!\n";
1451      return;
1452    }
1453    $output = \*TEMP;
1454    print $output HtmlListingHeader();
1455    printf $output ("<div class=\"legend\">%s<br>Total: %s %s</div>\n",
1456                    $main::prog, Unparse($total), Units());
1457  }
1458
1459  my $listed = 0;
1460  foreach my $lib (@{$libs}) {
1461    my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts);
1462    my $offset = AddressSub($lib->[1], $lib->[3]);
1463    foreach my $routine (sort ByName keys(%{$symbol_table})) {
1464      # Print if there are any samples in this routine
1465      my $start_addr = $symbol_table->{$routine}->[0];
1466      my $end_addr = $symbol_table->{$routine}->[1];
1467      my $length = hex(AddressSub($end_addr, $start_addr));
1468      my $addr = AddressAdd($start_addr, $offset);
1469      for (my $i = 0; $i < $length; $i++) {
1470        if (defined($cumulative->{$addr})) {
1471          $listed += PrintSource(
1472            $lib->[0], $offset,
1473            $routine, $flat, $cumulative,
1474            $start_addr, $end_addr,
1475            $html,
1476            $output);
1477          last;
1478        }
1479        $addr = AddressInc($addr);
1480      }
1481    }
1482  }
1483
1484  if ($html) {
1485    if ($listed > 0) {
1486      print $output HtmlListingFooter();
1487      close($output);
1488      RunWeb($fname);
1489    } else {
1490      close($output);
1491      unlink($fname);
1492    }
1493  }
1494}
1495
1496sub HtmlListingHeader {
1497  return <<'EOF';
1498<DOCTYPE html>
1499<html>
1500<head>
1501<title>Pprof listing</title>
1502<style type="text/css">
1503body {
1504  font-family: sans-serif;
1505}
1506h1 {
1507  font-size: 1.5em;
1508  margin-bottom: 4px;
1509}
1510.legend {
1511  font-size: 1.25em;
1512}
1513.line {
1514  color: #aaaaaa;
1515}
1516.nop {
1517  color: #aaaaaa;
1518}
1519.unimportant {
1520  color: #cccccc;
1521}
1522.disasmloc {
1523  color: #000000;
1524}
1525.deadsrc {
1526  cursor: pointer;
1527}
1528.deadsrc:hover {
1529  background-color: #eeeeee;
1530}
1531.livesrc {
1532  color: #0000ff;
1533  cursor: pointer;
1534}
1535.livesrc:hover {
1536  background-color: #eeeeee;
1537}
1538.asm {
1539  color: #008800;
1540  display: none;
1541}
1542</style>
1543<script type="text/javascript">
1544function jeprof_toggle_asm(e) {
1545  var target;
1546  if (!e) e = window.event;
1547  if (e.target) target = e.target;
1548  else if (e.srcElement) target = e.srcElement;
1549
1550  if (target) {
1551    var asm = target.nextSibling;
1552    if (asm && asm.className == "asm") {
1553      asm.style.display = (asm.style.display == "block" ? "" : "block");
1554      e.preventDefault();
1555      return false;
1556    }
1557  }
1558}
1559</script>
1560</head>
1561<body>
1562EOF
1563}
1564
1565sub HtmlListingFooter {
1566  return <<'EOF';
1567</body>
1568</html>
1569EOF
1570}
1571
1572sub HtmlEscape {
1573  my $text = shift;
1574  $text =~ s/&/&amp;/g;
1575  $text =~ s/</&lt;/g;
1576  $text =~ s/>/&gt;/g;
1577  return $text;
1578}
1579
1580# Returns the indentation of the line, if it has any non-whitespace
1581# characters.  Otherwise, returns -1.
1582sub Indentation {
1583  my $line = shift;
1584  if (m/^(\s*)\S/) {
1585    return length($1);
1586  } else {
1587    return -1;
1588  }
1589}
1590
1591# If the symbol table contains inlining info, Disassemble() may tag an
1592# instruction with a location inside an inlined function.  But for
1593# source listings, we prefer to use the location in the function we
1594# are listing.  So use MapToSymbols() to fetch full location
1595# information for each instruction and then pick out the first
1596# location from a location list (location list contains callers before
1597# callees in case of inlining).
1598#
1599# After this routine has run, each entry in $instructions contains:
1600#   [0] start address
1601#   [1] filename for function we are listing
1602#   [2] line number for function we are listing
1603#   [3] disassembly
1604#   [4] limit address
1605#   [5] most specific filename (may be different from [1] due to inlining)
1606#   [6] most specific line number (may be different from [2] due to inlining)
1607sub GetTopLevelLineNumbers {
1608  my ($lib, $offset, $instructions) = @_;
1609  my $pcs = [];
1610  for (my $i = 0; $i <= $#{$instructions}; $i++) {
1611    push(@{$pcs}, $instructions->[$i]->[0]);
1612  }
1613  my $symbols = {};
1614  MapToSymbols($lib, $offset, $pcs, $symbols);
1615  for (my $i = 0; $i <= $#{$instructions}; $i++) {
1616    my $e = $instructions->[$i];
1617    push(@{$e}, $e->[1]);
1618    push(@{$e}, $e->[2]);
1619    my $addr = $e->[0];
1620    my $sym = $symbols->{$addr};
1621    if (defined($sym)) {
1622      if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) {
1623        $e->[1] = $1;  # File name
1624        $e->[2] = $2;  # Line number
1625      }
1626    }
1627  }
1628}
1629
1630# Print source-listing for one routine
1631sub PrintSource {
1632  my $prog = shift;
1633  my $offset = shift;
1634  my $routine = shift;
1635  my $flat = shift;
1636  my $cumulative = shift;
1637  my $start_addr = shift;
1638  my $end_addr = shift;
1639  my $html = shift;
1640  my $output = shift;
1641
1642  # Disassemble all instructions (just to get line numbers)
1643  my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr);
1644  GetTopLevelLineNumbers($prog, $offset, \@instructions);
1645
1646  # Hack 1: assume that the first source file encountered in the
1647  # disassembly contains the routine
1648  my $filename = undef;
1649  for (my $i = 0; $i <= $#instructions; $i++) {
1650    if ($instructions[$i]->[2] >= 0) {
1651      $filename = $instructions[$i]->[1];
1652      last;
1653    }
1654  }
1655  if (!defined($filename)) {
1656    print STDERR "no filename found in $routine\n";
1657    return 0;
1658  }
1659
1660  # Hack 2: assume that the largest line number from $filename is the
1661  # end of the procedure.  This is typically safe since if P1 contains
1662  # an inlined call to P2, then P2 usually occurs earlier in the
1663  # source file.  If this does not work, we might have to compute a
1664  # density profile or just print all regions we find.
1665  my $lastline = 0;
1666  for (my $i = 0; $i <= $#instructions; $i++) {
1667    my $f = $instructions[$i]->[1];
1668    my $l = $instructions[$i]->[2];
1669    if (($f eq $filename) && ($l > $lastline)) {
1670      $lastline = $l;
1671    }
1672  }
1673
1674  # Hack 3: assume the first source location from "filename" is the start of
1675  # the source code.
1676  my $firstline = 1;
1677  for (my $i = 0; $i <= $#instructions; $i++) {
1678    if ($instructions[$i]->[1] eq $filename) {
1679      $firstline = $instructions[$i]->[2];
1680      last;
1681    }
1682  }
1683
1684  # Hack 4: Extend last line forward until its indentation is less than
1685  # the indentation we saw on $firstline
1686  my $oldlastline = $lastline;
1687  {
1688    if (!open(FILE, "<$filename")) {
1689      print STDERR "$filename: $!\n";
1690      return 0;
1691    }
1692    my $l = 0;
1693    my $first_indentation = -1;
1694    while (<FILE>) {
1695      s/\r//g;         # turn windows-looking lines into unix-looking lines
1696      $l++;
1697      my $indent = Indentation($_);
1698      if ($l >= $firstline) {
1699        if ($first_indentation < 0 && $indent >= 0) {
1700          $first_indentation = $indent;
1701          last if ($first_indentation == 0);
1702        }
1703      }
1704      if ($l >= $lastline && $indent >= 0) {
1705        if ($indent >= $first_indentation) {
1706          $lastline = $l+1;
1707        } else {
1708          last;
1709        }
1710      }
1711    }
1712    close(FILE);
1713  }
1714
1715  # Assign all samples to the range $firstline,$lastline,
1716  # Hack 4: If an instruction does not occur in the range, its samples
1717  # are moved to the next instruction that occurs in the range.
1718  my $samples1 = {};        # Map from line number to flat count
1719  my $samples2 = {};        # Map from line number to cumulative count
1720  my $running1 = 0;         # Unassigned flat counts
1721  my $running2 = 0;         # Unassigned cumulative counts
1722  my $total1 = 0;           # Total flat counts
1723  my $total2 = 0;           # Total cumulative counts
1724  my %disasm = ();          # Map from line number to disassembly
1725  my $running_disasm = "";  # Unassigned disassembly
1726  my $skip_marker = "---\n";
1727  if ($html) {
1728    $skip_marker = "";
1729    for (my $l = $firstline; $l <= $lastline; $l++) {
1730      $disasm{$l} = "";
1731    }
1732  }
1733  my $last_dis_filename = '';
1734  my $last_dis_linenum = -1;
1735  my $last_touched_line = -1;  # To detect gaps in disassembly for a line
1736  foreach my $e (@instructions) {
1737    # Add up counts for all address that fall inside this instruction
1738    my $c1 = 0;
1739    my $c2 = 0;
1740    for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) {
1741      $c1 += GetEntry($flat, $a);
1742      $c2 += GetEntry($cumulative, $a);
1743    }
1744
1745    if ($html) {
1746      my $dis = sprintf("      %6s %6s \t\t%8s: %s ",
1747                        HtmlPrintNumber($c1),
1748                        HtmlPrintNumber($c2),
1749                        UnparseAddress($offset, $e->[0]),
1750                        CleanDisassembly($e->[3]));
1751
1752      # Append the most specific source line associated with this instruction
1753      if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) };
1754      $dis = HtmlEscape($dis);
1755      my $f = $e->[5];
1756      my $l = $e->[6];
1757      if ($f ne $last_dis_filename) {
1758        $dis .= sprintf("<span class=disasmloc>%s:%d</span>",
1759                        HtmlEscape(CleanFileName($f)), $l);
1760      } elsif ($l ne $last_dis_linenum) {
1761        # De-emphasize the unchanged file name portion
1762        $dis .= sprintf("<span class=unimportant>%s</span>" .
1763                        "<span class=disasmloc>:%d</span>",
1764                        HtmlEscape(CleanFileName($f)), $l);
1765      } else {
1766        # De-emphasize the entire location
1767        $dis .= sprintf("<span class=unimportant>%s:%d</span>",
1768                        HtmlEscape(CleanFileName($f)), $l);
1769      }
1770      $last_dis_filename = $f;
1771      $last_dis_linenum = $l;
1772      $running_disasm .= $dis;
1773      $running_disasm .= "\n";
1774    }
1775
1776    $running1 += $c1;
1777    $running2 += $c2;
1778    $total1 += $c1;
1779    $total2 += $c2;
1780    my $file = $e->[1];
1781    my $line = $e->[2];
1782    if (($file eq $filename) &&
1783        ($line >= $firstline) &&
1784        ($line <= $lastline)) {
1785      # Assign all accumulated samples to this line
1786      AddEntry($samples1, $line, $running1);
1787      AddEntry($samples2, $line, $running2);
1788      $running1 = 0;
1789      $running2 = 0;
1790      if ($html) {
1791        if ($line != $last_touched_line && $disasm{$line} ne '') {
1792          $disasm{$line} .= "\n";
1793        }
1794        $disasm{$line} .= $running_disasm;
1795        $running_disasm = '';
1796        $last_touched_line = $line;
1797      }
1798    }
1799  }
1800
1801  # Assign any leftover samples to $lastline
1802  AddEntry($samples1, $lastline, $running1);
1803  AddEntry($samples2, $lastline, $running2);
1804  if ($html) {
1805    if ($lastline != $last_touched_line && $disasm{$lastline} ne '') {
1806      $disasm{$lastline} .= "\n";
1807    }
1808    $disasm{$lastline} .= $running_disasm;
1809  }
1810
1811  if ($html) {
1812    printf $output (
1813      "<h1>%s</h1>%s\n<pre onClick=\"jeprof_toggle_asm()\">\n" .
1814      "Total:%6s %6s (flat / cumulative %s)\n",
1815      HtmlEscape(ShortFunctionName($routine)),
1816      HtmlEscape(CleanFileName($filename)),
1817      Unparse($total1),
1818      Unparse($total2),
1819      Units());
1820  } else {
1821    printf $output (
1822      "ROUTINE ====================== %s in %s\n" .
1823      "%6s %6s Total %s (flat / cumulative)\n",
1824      ShortFunctionName($routine),
1825      CleanFileName($filename),
1826      Unparse($total1),
1827      Unparse($total2),
1828      Units());
1829  }
1830  if (!open(FILE, "<$filename")) {
1831    print STDERR "$filename: $!\n";
1832    return 0;
1833  }
1834  my $l = 0;
1835  while (<FILE>) {
1836    s/\r//g;         # turn windows-looking lines into unix-looking lines
1837    $l++;
1838    if ($l >= $firstline - 5 &&
1839        (($l <= $oldlastline + 5) || ($l <= $lastline))) {
1840      chop;
1841      my $text = $_;
1842      if ($l == $firstline) { print $output $skip_marker; }
1843      my $n1 = GetEntry($samples1, $l);
1844      my $n2 = GetEntry($samples2, $l);
1845      if ($html) {
1846        # Emit a span that has one of the following classes:
1847        #    livesrc -- has samples
1848        #    deadsrc -- has disassembly, but with no samples
1849        #    nop     -- has no matching disasembly
1850        # Also emit an optional span containing disassembly.
1851        my $dis = $disasm{$l};
1852        my $asm = "";
1853        if (defined($dis) && $dis ne '') {
1854          $asm = "<span class=\"asm\">" . $dis . "</span>";
1855        }
1856        my $source_class = (($n1 + $n2 > 0)
1857                            ? "livesrc"
1858                            : (($asm ne "") ? "deadsrc" : "nop"));
1859        printf $output (
1860          "<span class=\"line\">%5d</span> " .
1861          "<span class=\"%s\">%6s %6s %s</span>%s\n",
1862          $l, $source_class,
1863          HtmlPrintNumber($n1),
1864          HtmlPrintNumber($n2),
1865          HtmlEscape($text),
1866          $asm);
1867      } else {
1868        printf $output(
1869          "%6s %6s %4d: %s\n",
1870          UnparseAlt($n1),
1871          UnparseAlt($n2),
1872          $l,
1873          $text);
1874      }
1875      if ($l == $lastline)  { print $output $skip_marker; }
1876    };
1877  }
1878  close(FILE);
1879  if ($html) {
1880    print $output "</pre>\n";
1881  }
1882  return 1;
1883}
1884
1885# Return the source line for the specified file/linenumber.
1886# Returns undef if not found.
1887sub SourceLine {
1888  my $file = shift;
1889  my $line = shift;
1890
1891  # Look in cache
1892  if (!defined($main::source_cache{$file})) {
1893    if (100 < scalar keys(%main::source_cache)) {
1894      # Clear the cache when it gets too big
1895      $main::source_cache = ();
1896    }
1897
1898    # Read all lines from the file
1899    if (!open(FILE, "<$file")) {
1900      print STDERR "$file: $!\n";
1901      $main::source_cache{$file} = [];  # Cache the negative result
1902      return undef;
1903    }
1904    my $lines = [];
1905    push(@{$lines}, "");        # So we can use 1-based line numbers as indices
1906    while (<FILE>) {
1907      push(@{$lines}, $_);
1908    }
1909    close(FILE);
1910
1911    # Save the lines in the cache
1912    $main::source_cache{$file} = $lines;
1913  }
1914
1915  my $lines = $main::source_cache{$file};
1916  if (($line < 0) || ($line > $#{$lines})) {
1917    return undef;
1918  } else {
1919    return $lines->[$line];
1920  }
1921}
1922
1923# Print disassembly for one routine with interspersed source if available
1924sub PrintDisassembledFunction {
1925  my $prog = shift;
1926  my $offset = shift;
1927  my $routine = shift;
1928  my $flat = shift;
1929  my $cumulative = shift;
1930  my $start_addr = shift;
1931  my $end_addr = shift;
1932  my $total = shift;
1933
1934  # Disassemble all instructions
1935  my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr);
1936
1937  # Make array of counts per instruction
1938  my @flat_count = ();
1939  my @cum_count = ();
1940  my $flat_total = 0;
1941  my $cum_total = 0;
1942  foreach my $e (@instructions) {
1943    # Add up counts for all address that fall inside this instruction
1944    my $c1 = 0;
1945    my $c2 = 0;
1946    for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) {
1947      $c1 += GetEntry($flat, $a);
1948      $c2 += GetEntry($cumulative, $a);
1949    }
1950    push(@flat_count, $c1);
1951    push(@cum_count, $c2);
1952    $flat_total += $c1;
1953    $cum_total += $c2;
1954  }
1955
1956  # Print header with total counts
1957  printf("ROUTINE ====================== %s\n" .
1958         "%6s %6s %s (flat, cumulative) %.1f%% of total\n",
1959         ShortFunctionName($routine),
1960         Unparse($flat_total),
1961         Unparse($cum_total),
1962         Units(),
1963         ($cum_total * 100.0) / $total);
1964
1965  # Process instructions in order
1966  my $current_file = "";
1967  for (my $i = 0; $i <= $#instructions; ) {
1968    my $e = $instructions[$i];
1969
1970    # Print the new file name whenever we switch files
1971    if ($e->[1] ne $current_file) {
1972      $current_file = $e->[1];
1973      my $fname = $current_file;
1974      $fname =~ s|^\./||;   # Trim leading "./"
1975
1976      # Shorten long file names
1977      if (length($fname) >= 58) {
1978        $fname = "..." . substr($fname, -55);
1979      }
1980      printf("-------------------- %s\n", $fname);
1981    }
1982
1983    # TODO: Compute range of lines to print together to deal with
1984    # small reorderings.
1985    my $first_line = $e->[2];
1986    my $last_line = $first_line;
1987    my %flat_sum = ();
1988    my %cum_sum = ();
1989    for (my $l = $first_line; $l <= $last_line; $l++) {
1990      $flat_sum{$l} = 0;
1991      $cum_sum{$l} = 0;
1992    }
1993
1994    # Find run of instructions for this range of source lines
1995    my $first_inst = $i;
1996    while (($i <= $#instructions) &&
1997           ($instructions[$i]->[2] >= $first_line) &&
1998           ($instructions[$i]->[2] <= $last_line)) {
1999      $e = $instructions[$i];
2000      $flat_sum{$e->[2]} += $flat_count[$i];
2001      $cum_sum{$e->[2]} += $cum_count[$i];
2002      $i++;
2003    }
2004    my $last_inst = $i - 1;
2005
2006    # Print source lines
2007    for (my $l = $first_line; $l <= $last_line; $l++) {
2008      my $line = SourceLine($current_file, $l);
2009      if (!defined($line)) {
2010        $line = "?\n";
2011        next;
2012      } else {
2013        $line =~ s/^\s+//;
2014      }
2015      printf("%6s %6s %5d: %s",
2016             UnparseAlt($flat_sum{$l}),
2017             UnparseAlt($cum_sum{$l}),
2018             $l,
2019             $line);
2020    }
2021
2022    # Print disassembly
2023    for (my $x = $first_inst; $x <= $last_inst; $x++) {
2024      my $e = $instructions[$x];
2025      printf("%6s %6s    %8s: %6s\n",
2026             UnparseAlt($flat_count[$x]),
2027             UnparseAlt($cum_count[$x]),
2028             UnparseAddress($offset, $e->[0]),
2029             CleanDisassembly($e->[3]));
2030    }
2031  }
2032}
2033
2034# Print DOT graph
2035sub PrintDot {
2036  my $prog = shift;
2037  my $symbols = shift;
2038  my $raw = shift;
2039  my $flat = shift;
2040  my $cumulative = shift;
2041  my $overall_total = shift;
2042
2043  # Get total
2044  my $local_total = TotalProfile($flat);
2045  my $nodelimit = int($main::opt_nodefraction * $local_total);
2046  my $edgelimit = int($main::opt_edgefraction * $local_total);
2047  my $nodecount = $main::opt_nodecount;
2048
2049  # Find nodes to include
2050  my @list = (sort { abs(GetEntry($cumulative, $b)) <=>
2051                     abs(GetEntry($cumulative, $a))
2052                     || $a cmp $b }
2053              keys(%{$cumulative}));
2054  my $last = $nodecount - 1;
2055  if ($last > $#list) {
2056    $last = $#list;
2057  }
2058  while (($last >= 0) &&
2059         (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) {
2060    $last--;
2061  }
2062  if ($last < 0) {
2063    print STDERR "No nodes to print\n";
2064    return 0;
2065  }
2066
2067  if ($nodelimit > 0 || $edgelimit > 0) {
2068    printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n",
2069                   Unparse($nodelimit), Units(),
2070                   Unparse($edgelimit), Units());
2071  }
2072
2073  # Open DOT output file
2074  my $output;
2075  my $escaped_dot = ShellEscape(@DOT);
2076  my $escaped_ps2pdf = ShellEscape(@PS2PDF);
2077  if ($main::opt_gv) {
2078    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps"));
2079    $output = "| $escaped_dot -Tps2 >$escaped_outfile";
2080  } elsif ($main::opt_evince) {
2081    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf"));
2082    $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile";
2083  } elsif ($main::opt_ps) {
2084    $output = "| $escaped_dot -Tps2";
2085  } elsif ($main::opt_pdf) {
2086    $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -";
2087  } elsif ($main::opt_web || $main::opt_svg) {
2088    # We need to post-process the SVG, so write to a temporary file always.
2089    my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg"));
2090    $output = "| $escaped_dot -Tsvg >$escaped_outfile";
2091  } elsif ($main::opt_gif) {
2092    $output = "| $escaped_dot -Tgif";
2093  } else {
2094    $output = ">&STDOUT";
2095  }
2096  open(DOT, $output) || error("$output: $!\n");
2097
2098  # Title
2099  printf DOT ("digraph \"%s; %s %s\" {\n",
2100              $prog,
2101              Unparse($overall_total),
2102              Units());
2103  if ($main::opt_pdf) {
2104    # The output is more printable if we set the page size for dot.
2105    printf DOT ("size=\"8,11\"\n");
2106  }
2107  printf DOT ("node [width=0.375,height=0.25];\n");
2108
2109  # Print legend
2110  printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," .
2111              "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n",
2112              $prog,
2113              sprintf("Total %s: %s", Units(), Unparse($overall_total)),
2114              sprintf("Focusing on: %s", Unparse($local_total)),
2115              sprintf("Dropped nodes with <= %s abs(%s)",
2116                      Unparse($nodelimit), Units()),
2117              sprintf("Dropped edges with <= %s %s",
2118                      Unparse($edgelimit), Units())
2119              );
2120
2121  # Print nodes
2122  my %node = ();
2123  my $nextnode = 1;
2124  foreach my $a (@list[0..$last]) {
2125    # Pick font size
2126    my $f = GetEntry($flat, $a);
2127    my $c = GetEntry($cumulative, $a);
2128
2129    my $fs = 8;
2130    if ($local_total > 0) {
2131      $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total)));
2132    }
2133
2134    $node{$a} = $nextnode++;
2135    my $sym = $a;
2136    $sym =~ s/\s+/\\n/g;
2137    $sym =~ s/::/\\n/g;
2138
2139    # Extra cumulative info to print for non-leaves
2140    my $extra = "";
2141    if ($f != $c) {
2142      $extra = sprintf("\\rof %s (%s)",
2143                       Unparse($c),
2144                       Percent($c, $local_total));
2145    }
2146    my $style = "";
2147    if ($main::opt_heapcheck) {
2148      if ($f > 0) {
2149        # make leak-causing nodes more visible (add a background)
2150        $style = ",style=filled,fillcolor=gray"
2151      } elsif ($f < 0) {
2152        # make anti-leak-causing nodes (which almost never occur)
2153        # stand out as well (triple border)
2154        $style = ",peripheries=3"
2155      }
2156    }
2157
2158    printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" .
2159                "\",shape=box,fontsize=%.1f%s];\n",
2160                $node{$a},
2161                $sym,
2162                Unparse($f),
2163                Percent($f, $local_total),
2164                $extra,
2165                $fs,
2166                $style,
2167               );
2168  }
2169
2170  # Get edges and counts per edge
2171  my %edge = ();
2172  my $n;
2173  my $fullname_to_shortname_map = {};
2174  FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map);
2175  foreach my $k (keys(%{$raw})) {
2176    # TODO: omit low %age edges
2177    $n = $raw->{$k};
2178    my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k);
2179    for (my $i = 1; $i <= $#translated; $i++) {
2180      my $src = $translated[$i];
2181      my $dst = $translated[$i-1];
2182      #next if ($src eq $dst);  # Avoid self-edges?
2183      if (exists($node{$src}) && exists($node{$dst})) {
2184        my $edge_label = "$src\001$dst";
2185        if (!exists($edge{$edge_label})) {
2186          $edge{$edge_label} = 0;
2187        }
2188        $edge{$edge_label} += $n;
2189      }
2190    }
2191  }
2192
2193  # Print edges (process in order of decreasing counts)
2194  my %indegree = ();   # Number of incoming edges added per node so far
2195  my %outdegree = ();  # Number of outgoing edges added per node so far
2196  foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) {
2197    my @x = split(/\001/, $e);
2198    $n = $edge{$e};
2199
2200    # Initialize degree of kept incoming and outgoing edges if necessary
2201    my $src = $x[0];
2202    my $dst = $x[1];
2203    if (!exists($outdegree{$src})) { $outdegree{$src} = 0; }
2204    if (!exists($indegree{$dst})) { $indegree{$dst} = 0; }
2205
2206    my $keep;
2207    if ($indegree{$dst} == 0) {
2208      # Keep edge if needed for reachability
2209      $keep = 1;
2210    } elsif (abs($n) <= $edgelimit) {
2211      # Drop if we are below --edgefraction
2212      $keep = 0;
2213    } elsif ($outdegree{$src} >= $main::opt_maxdegree ||
2214             $indegree{$dst} >= $main::opt_maxdegree) {
2215      # Keep limited number of in/out edges per node
2216      $keep = 0;
2217    } else {
2218      $keep = 1;
2219    }
2220
2221    if ($keep) {
2222      $outdegree{$src}++;
2223      $indegree{$dst}++;
2224
2225      # Compute line width based on edge count
2226      my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0);
2227      if ($fraction > 1) { $fraction = 1; }
2228      my $w = $fraction * 2;
2229      if ($w < 1 && ($main::opt_web || $main::opt_svg)) {
2230        # SVG output treats line widths < 1 poorly.
2231        $w = 1;
2232      }
2233
2234      # Dot sometimes segfaults if given edge weights that are too large, so
2235      # we cap the weights at a large value
2236      my $edgeweight = abs($n) ** 0.7;
2237      if ($edgeweight > 100000) { $edgeweight = 100000; }
2238      $edgeweight = int($edgeweight);
2239
2240      my $style = sprintf("setlinewidth(%f)", $w);
2241      if ($x[1] =~ m/\(inline\)/) {
2242        $style .= ",dashed";
2243      }
2244
2245      # Use a slightly squashed function of the edge count as the weight
2246      printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n",
2247                  $node{$x[0]},
2248                  $node{$x[1]},
2249                  Unparse($n),
2250                  $edgeweight,
2251                  $style);
2252    }
2253  }
2254
2255  print DOT ("}\n");
2256  close(DOT);
2257
2258  if ($main::opt_web || $main::opt_svg) {
2259    # Rewrite SVG to be more usable inside web browser.
2260    RewriteSvg(TempName($main::next_tmpfile, "svg"));
2261  }
2262
2263  return 1;
2264}
2265
2266sub RewriteSvg {
2267  my $svgfile = shift;
2268
2269  open(SVG, $svgfile) || die "open temp svg: $!";
2270  my @svg = <SVG>;
2271  close(SVG);
2272  unlink $svgfile;
2273  my $svg = join('', @svg);
2274
2275  # Dot's SVG output is
2276  #
2277  #    <svg width="___" height="___"
2278  #     viewBox="___" xmlns=...>
2279  #    <g id="graph0" transform="...">
2280  #    ...
2281  #    </g>
2282  #    </svg>
2283  #
2284  # Change it to
2285  #
2286  #    <svg width="100%" height="100%"
2287  #     xmlns=...>
2288  #    $svg_javascript
2289  #    <g id="viewport" transform="translate(0,0)">
2290  #    <g id="graph0" transform="...">
2291  #    ...
2292  #    </g>
2293  #    </g>
2294  #    </svg>
2295
2296  # Fix width, height; drop viewBox.
2297  $svg =~ s/(?s)<svg width="[^"]+" height="[^"]+"(.*?)viewBox="[^"]+"/<svg width="100%" height="100%"$1/;
2298
2299  # Insert script, viewport <g> above first <g>
2300  my $svg_javascript = SvgJavascript();
2301  my $viewport = "<g id=\"viewport\" transform=\"translate(0,0)\">\n";
2302  $svg =~ s/<g id="graph\d"/$svg_javascript$viewport$&/;
2303
2304  # Insert final </g> above </svg>.
2305  $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/;
2306  $svg =~ s/<g id="graph\d"(.*?)/<g id="viewport"$1/;
2307
2308  if ($main::opt_svg) {
2309    # --svg: write to standard output.
2310    print $svg;
2311  } else {
2312    # Write back to temporary file.
2313    open(SVG, ">$svgfile") || die "open $svgfile: $!";
2314    print SVG $svg;
2315    close(SVG);
2316  }
2317}
2318
2319sub SvgJavascript {
2320  return <<'EOF';
2321<script type="text/ecmascript"><![CDATA[
2322// SVGPan
2323// http://www.cyberz.org/blog/2009/12/08/svgpan-a-javascript-svg-panzoomdrag-library/
2324// Local modification: if(true || ...) below to force panning, never moving.
2325
2326/**
2327 *  SVGPan library 1.2
2328 * ====================
2329 *
2330 * Given an unique existing element with id "viewport", including the
2331 * the library into any SVG adds the following capabilities:
2332 *
2333 *  - Mouse panning
2334 *  - Mouse zooming (using the wheel)
2335 *  - Object dargging
2336 *
2337 * Known issues:
2338 *
2339 *  - Zooming (while panning) on Safari has still some issues
2340 *
2341 * Releases:
2342 *
2343 * 1.2, Sat Mar 20 08:42:50 GMT 2010, Zeng Xiaohui
2344 *	Fixed a bug with browser mouse handler interaction
2345 *
2346 * 1.1, Wed Feb  3 17:39:33 GMT 2010, Zeng Xiaohui
2347 *	Updated the zoom code to support the mouse wheel on Safari/Chrome
2348 *
2349 * 1.0, Andrea Leofreddi
2350 *	First release
2351 *
2352 * This code is licensed under the following BSD license:
2353 *
2354 * Copyright 2009-2010 Andrea Leofreddi <a.leofreddi@itcharm.com>. All rights reserved.
2355 *
2356 * Redistribution and use in source and binary forms, with or without modification, are
2357 * permitted provided that the following conditions are met:
2358 *
2359 *    1. Redistributions of source code must retain the above copyright notice, this list of
2360 *       conditions and the following disclaimer.
2361 *
2362 *    2. Redistributions in binary form must reproduce the above copyright notice, this list
2363 *       of conditions and the following disclaimer in the documentation and/or other materials
2364 *       provided with the distribution.
2365 *
2366 * THIS SOFTWARE IS PROVIDED BY Andrea Leofreddi ``AS IS'' AND ANY EXPRESS OR IMPLIED
2367 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
2368 * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Andrea Leofreddi OR
2369 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2370 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
2371 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
2372 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
2373 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
2374 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2375 *
2376 * The views and conclusions contained in the software and documentation are those of the
2377 * authors and should not be interpreted as representing official policies, either expressed
2378 * or implied, of Andrea Leofreddi.
2379 */
2380
2381var root = document.documentElement;
2382
2383var state = 'none', stateTarget, stateOrigin, stateTf;
2384
2385setupHandlers(root);
2386
2387/**
2388 * Register handlers
2389 */
2390function setupHandlers(root){
2391	setAttributes(root, {
2392		"onmouseup" : "add(evt)",
2393		"onmousedown" : "handleMouseDown(evt)",
2394		"onmousemove" : "handleMouseMove(evt)",
2395		"onmouseup" : "handleMouseUp(evt)",
2396		//"onmouseout" : "handleMouseUp(evt)", // Decomment this to stop the pan functionality when dragging out of the SVG element
2397	});
2398
2399	if(navigator.userAgent.toLowerCase().indexOf('webkit') >= 0)
2400		window.addEventListener('mousewheel', handleMouseWheel, false); // Chrome/Safari
2401	else
2402		window.addEventListener('DOMMouseScroll', handleMouseWheel, false); // Others
2403
2404	var g = svgDoc.getElementById("svg");
2405	g.width = "100%";
2406	g.height = "100%";
2407}
2408
2409/**
2410 * Instance an SVGPoint object with given event coordinates.
2411 */
2412function getEventPoint(evt) {
2413	var p = root.createSVGPoint();
2414
2415	p.x = evt.clientX;
2416	p.y = evt.clientY;
2417
2418	return p;
2419}
2420
2421/**
2422 * Sets the current transform matrix of an element.
2423 */
2424function setCTM(element, matrix) {
2425	var s = "matrix(" + matrix.a + "," + matrix.b + "," + matrix.c + "," + matrix.d + "," + matrix.e + "," + matrix.f + ")";
2426
2427	element.setAttribute("transform", s);
2428}
2429
2430/**
2431 * Dumps a matrix to a string (useful for debug).
2432 */
2433function dumpMatrix(matrix) {
2434	var s = "[ " + matrix.a + ", " + matrix.c + ", " + matrix.e + "\n  " + matrix.b + ", " + matrix.d + ", " + matrix.f + "\n  0, 0, 1 ]";
2435
2436	return s;
2437}
2438
2439/**
2440 * Sets attributes of an element.
2441 */
2442function setAttributes(element, attributes){
2443	for (i in attributes)
2444		element.setAttributeNS(null, i, attributes[i]);
2445}
2446
2447/**
2448 * Handle mouse move event.
2449 */
2450function handleMouseWheel(evt) {
2451	if(evt.preventDefault)
2452		evt.preventDefault();
2453
2454	evt.returnValue = false;
2455
2456	var svgDoc = evt.target.ownerDocument;
2457
2458	var delta;
2459
2460	if(evt.wheelDelta)
2461		delta = evt.wheelDelta / 3600; // Chrome/Safari
2462	else
2463		delta = evt.detail / -90; // Mozilla
2464
2465	var z = 1 + delta; // Zoom factor: 0.9/1.1
2466
2467	var g = svgDoc.getElementById("viewport");
2468
2469	var p = getEventPoint(evt);
2470
2471	p = p.matrixTransform(g.getCTM().inverse());
2472
2473	// Compute new scale matrix in current mouse position
2474	var k = root.createSVGMatrix().translate(p.x, p.y).scale(z).translate(-p.x, -p.y);
2475
2476        setCTM(g, g.getCTM().multiply(k));
2477
2478	stateTf = stateTf.multiply(k.inverse());
2479}
2480
2481/**
2482 * Handle mouse move event.
2483 */
2484function handleMouseMove(evt) {
2485	if(evt.preventDefault)
2486		evt.preventDefault();
2487
2488	evt.returnValue = false;
2489
2490	var svgDoc = evt.target.ownerDocument;
2491
2492	var g = svgDoc.getElementById("viewport");
2493
2494	if(state == 'pan') {
2495		// Pan mode
2496		var p = getEventPoint(evt).matrixTransform(stateTf);
2497
2498		setCTM(g, stateTf.inverse().translate(p.x - stateOrigin.x, p.y - stateOrigin.y));
2499	} else if(state == 'move') {
2500		// Move mode
2501		var p = getEventPoint(evt).matrixTransform(g.getCTM().inverse());
2502
2503		setCTM(stateTarget, root.createSVGMatrix().translate(p.x - stateOrigin.x, p.y - stateOrigin.y).multiply(g.getCTM().inverse()).multiply(stateTarget.getCTM()));
2504
2505		stateOrigin = p;
2506	}
2507}
2508
2509/**
2510 * Handle click event.
2511 */
2512function handleMouseDown(evt) {
2513	if(evt.preventDefault)
2514		evt.preventDefault();
2515
2516	evt.returnValue = false;
2517
2518	var svgDoc = evt.target.ownerDocument;
2519
2520	var g = svgDoc.getElementById("viewport");
2521
2522	if(true || evt.target.tagName == "svg") {
2523		// Pan mode
2524		state = 'pan';
2525
2526		stateTf = g.getCTM().inverse();
2527
2528		stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
2529	} else {
2530		// Move mode
2531		state = 'move';
2532
2533		stateTarget = evt.target;
2534
2535		stateTf = g.getCTM().inverse();
2536
2537		stateOrigin = getEventPoint(evt).matrixTransform(stateTf);
2538	}
2539}
2540
2541/**
2542 * Handle mouse button release event.
2543 */
2544function handleMouseUp(evt) {
2545	if(evt.preventDefault)
2546		evt.preventDefault();
2547
2548	evt.returnValue = false;
2549
2550	var svgDoc = evt.target.ownerDocument;
2551
2552	if(state == 'pan' || state == 'move') {
2553		// Quit pan mode
2554		state = '';
2555	}
2556}
2557
2558]]></script>
2559EOF
2560}
2561
2562# Provides a map from fullname to shortname for cases where the
2563# shortname is ambiguous.  The symlist has both the fullname and
2564# shortname for all symbols, which is usually fine, but sometimes --
2565# such as overloaded functions -- two different fullnames can map to
2566# the same shortname.  In that case, we use the address of the
2567# function to disambiguate the two.  This function fills in a map that
2568# maps fullnames to modified shortnames in such cases.  If a fullname
2569# is not present in the map, the 'normal' shortname provided by the
2570# symlist is the appropriate one to use.
2571sub FillFullnameToShortnameMap {
2572  my $symbols = shift;
2573  my $fullname_to_shortname_map = shift;
2574  my $shortnames_seen_once = {};
2575  my $shortnames_seen_more_than_once = {};
2576
2577  foreach my $symlist (values(%{$symbols})) {
2578    # TODO(csilvers): deal with inlined symbols too.
2579    my $shortname = $symlist->[0];
2580    my $fullname = $symlist->[2];
2581    if ($fullname !~ /<[0-9a-fA-F]+>$/) {  # fullname doesn't end in an address
2582      next;       # the only collisions we care about are when addresses differ
2583    }
2584    if (defined($shortnames_seen_once->{$shortname}) &&
2585        $shortnames_seen_once->{$shortname} ne $fullname) {
2586      $shortnames_seen_more_than_once->{$shortname} = 1;
2587    } else {
2588      $shortnames_seen_once->{$shortname} = $fullname;
2589    }
2590  }
2591
2592  foreach my $symlist (values(%{$symbols})) {
2593    my $shortname = $symlist->[0];
2594    my $fullname = $symlist->[2];
2595    # TODO(csilvers): take in a list of addresses we care about, and only
2596    # store in the map if $symlist->[1] is in that list.  Saves space.
2597    next if defined($fullname_to_shortname_map->{$fullname});
2598    if (defined($shortnames_seen_more_than_once->{$shortname})) {
2599      if ($fullname =~ /<0*([^>]*)>$/) {   # fullname has address at end of it
2600        $fullname_to_shortname_map->{$fullname} = "$shortname\@$1";
2601      }
2602    }
2603  }
2604}
2605
2606# Return a small number that identifies the argument.
2607# Multiple calls with the same argument will return the same number.
2608# Calls with different arguments will return different numbers.
2609sub ShortIdFor {
2610  my $key = shift;
2611  my $id = $main::uniqueid{$key};
2612  if (!defined($id)) {
2613    $id = keys(%main::uniqueid) + 1;
2614    $main::uniqueid{$key} = $id;
2615  }
2616  return $id;
2617}
2618
2619# Translate a stack of addresses into a stack of symbols
2620sub TranslateStack {
2621  my $symbols = shift;
2622  my $fullname_to_shortname_map = shift;
2623  my $k = shift;
2624
2625  my @addrs = split(/\n/, $k);
2626  my @result = ();
2627  for (my $i = 0; $i <= $#addrs; $i++) {
2628    my $a = $addrs[$i];
2629
2630    # Skip large addresses since they sometimes show up as fake entries on RH9
2631    if (length($a) > 8 && $a gt "7fffffffffffffff") {
2632      next;
2633    }
2634
2635    if ($main::opt_disasm || $main::opt_list) {
2636      # We want just the address for the key
2637      push(@result, $a);
2638      next;
2639    }
2640
2641    my $symlist = $symbols->{$a};
2642    if (!defined($symlist)) {
2643      $symlist = [$a, "", $a];
2644    }
2645
2646    # We can have a sequence of symbols for a particular entry
2647    # (more than one symbol in the case of inlining).  Callers
2648    # come before callees in symlist, so walk backwards since
2649    # the translated stack should contain callees before callers.
2650    for (my $j = $#{$symlist}; $j >= 2; $j -= 3) {
2651      my $func = $symlist->[$j-2];
2652      my $fileline = $symlist->[$j-1];
2653      my $fullfunc = $symlist->[$j];
2654      if (defined($fullname_to_shortname_map->{$fullfunc})) {
2655        $func = $fullname_to_shortname_map->{$fullfunc};
2656      }
2657      if ($j > 2) {
2658        $func = "$func (inline)";
2659      }
2660
2661      # Do not merge nodes corresponding to Callback::Run since that
2662      # causes confusing cycles in dot display.  Instead, we synthesize
2663      # a unique name for this frame per caller.
2664      if ($func =~ m/Callback.*::Run$/) {
2665        my $caller = ($i > 0) ? $addrs[$i-1] : 0;
2666        $func = "Run#" . ShortIdFor($caller);
2667      }
2668
2669      if ($main::opt_addresses) {
2670        push(@result, "$a $func $fileline");
2671      } elsif ($main::opt_lines) {
2672        if ($func eq '??' && $fileline eq '??:0') {
2673          push(@result, "$a");
2674        } else {
2675          push(@result, "$func $fileline");
2676        }
2677      } elsif ($main::opt_functions) {
2678        if ($func eq '??') {
2679          push(@result, "$a");
2680        } else {
2681          push(@result, $func);
2682        }
2683      } elsif ($main::opt_files) {
2684        if ($fileline eq '??:0' || $fileline eq '') {
2685          push(@result, "$a");
2686        } else {
2687          my $f = $fileline;
2688          $f =~ s/:\d+$//;
2689          push(@result, $f);
2690        }
2691      } else {
2692        push(@result, $a);
2693        last;  # Do not print inlined info
2694      }
2695    }
2696  }
2697
2698  # print join(",", @addrs), " => ", join(",", @result), "\n";
2699  return @result;
2700}
2701
2702# Generate percent string for a number and a total
2703sub Percent {
2704  my $num = shift;
2705  my $tot = shift;
2706  if ($tot != 0) {
2707    return sprintf("%.1f%%", $num * 100.0 / $tot);
2708  } else {
2709    return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf");
2710  }
2711}
2712
2713# Generate pretty-printed form of number
2714sub Unparse {
2715  my $num = shift;
2716  if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
2717    if ($main::opt_inuse_objects || $main::opt_alloc_objects) {
2718      return sprintf("%d", $num);
2719    } else {
2720      if ($main::opt_show_bytes) {
2721        return sprintf("%d", $num);
2722      } else {
2723        return sprintf("%.1f", $num / 1048576.0);
2724      }
2725    }
2726  } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) {
2727    return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds
2728  } else {
2729    return sprintf("%d", $num);
2730  }
2731}
2732
2733# Alternate pretty-printed form: 0 maps to "."
2734sub UnparseAlt {
2735  my $num = shift;
2736  if ($num == 0) {
2737    return ".";
2738  } else {
2739    return Unparse($num);
2740  }
2741}
2742
2743# Alternate pretty-printed form: 0 maps to ""
2744sub HtmlPrintNumber {
2745  my $num = shift;
2746  if ($num == 0) {
2747    return "";
2748  } else {
2749    return Unparse($num);
2750  }
2751}
2752
2753# Return output units
2754sub Units {
2755  if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
2756    if ($main::opt_inuse_objects || $main::opt_alloc_objects) {
2757      return "objects";
2758    } else {
2759      if ($main::opt_show_bytes) {
2760        return "B";
2761      } else {
2762        return "MB";
2763      }
2764    }
2765  } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) {
2766    return "seconds";
2767  } else {
2768    return "samples";
2769  }
2770}
2771
2772##### Profile manipulation code #####
2773
2774# Generate flattened profile:
2775# If count is charged to stack [a,b,c,d], in generated profile,
2776# it will be charged to [a]
2777sub FlatProfile {
2778  my $profile = shift;
2779  my $result = {};
2780  foreach my $k (keys(%{$profile})) {
2781    my $count = $profile->{$k};
2782    my @addrs = split(/\n/, $k);
2783    if ($#addrs >= 0) {
2784      AddEntry($result, $addrs[0], $count);
2785    }
2786  }
2787  return $result;
2788}
2789
2790# Generate cumulative profile:
2791# If count is charged to stack [a,b,c,d], in generated profile,
2792# it will be charged to [a], [b], [c], [d]
2793sub CumulativeProfile {
2794  my $profile = shift;
2795  my $result = {};
2796  foreach my $k (keys(%{$profile})) {
2797    my $count = $profile->{$k};
2798    my @addrs = split(/\n/, $k);
2799    foreach my $a (@addrs) {
2800      AddEntry($result, $a, $count);
2801    }
2802  }
2803  return $result;
2804}
2805
2806# If the second-youngest PC on the stack is always the same, returns
2807# that pc.  Otherwise, returns undef.
2808sub IsSecondPcAlwaysTheSame {
2809  my $profile = shift;
2810
2811  my $second_pc = undef;
2812  foreach my $k (keys(%{$profile})) {
2813    my @addrs = split(/\n/, $k);
2814    if ($#addrs < 1) {
2815      return undef;
2816    }
2817    if (not defined $second_pc) {
2818      $second_pc = $addrs[1];
2819    } else {
2820      if ($second_pc ne $addrs[1]) {
2821        return undef;
2822      }
2823    }
2824  }
2825  return $second_pc;
2826}
2827
2828sub ExtractSymbolNameInlineStack {
2829  my $symbols = shift;
2830  my $address = shift;
2831
2832  my @stack = ();
2833
2834  if (exists $symbols->{$address}) {
2835    my @localinlinestack = @{$symbols->{$address}};
2836    for (my $i = $#localinlinestack; $i > 0; $i-=3) {
2837      my $file = $localinlinestack[$i-1];
2838      my $fn = $localinlinestack[$i-0];
2839
2840      if ($file eq "?" || $file eq ":0") {
2841        $file = "??:0";
2842      }
2843      if ($fn eq '??') {
2844        # If we can't get the symbol name, at least use the file information.
2845        $fn = $file;
2846      }
2847      my $suffix = "[inline]";
2848      if ($i == 2) {
2849        $suffix = "";
2850      }
2851      push (@stack, $fn.$suffix);
2852    }
2853  }
2854  else {
2855    # If we can't get a symbol name, at least fill in the address.
2856    push (@stack, $address);
2857  }
2858
2859  return @stack;
2860}
2861
2862sub ExtractSymbolLocation {
2863  my $symbols = shift;
2864  my $address = shift;
2865  # 'addr2line' outputs "??:0" for unknown locations; we do the
2866  # same to be consistent.
2867  my $location = "??:0:unknown";
2868  if (exists $symbols->{$address}) {
2869    my $file = $symbols->{$address}->[1];
2870    if ($file eq "?") {
2871      $file = "??:0"
2872    }
2873    $location = $file . ":" . $symbols->{$address}->[0];
2874  }
2875  return $location;
2876}
2877
2878# Extracts a graph of calls.
2879sub ExtractCalls {
2880  my $symbols = shift;
2881  my $profile = shift;
2882
2883  my $calls = {};
2884  while( my ($stack_trace, $count) = each %$profile ) {
2885    my @address = split(/\n/, $stack_trace);
2886    my $destination = ExtractSymbolLocation($symbols, $address[0]);
2887    AddEntry($calls, $destination, $count);
2888    for (my $i = 1; $i <= $#address; $i++) {
2889      my $source = ExtractSymbolLocation($symbols, $address[$i]);
2890      my $call = "$source -> $destination";
2891      AddEntry($calls, $call, $count);
2892      $destination = $source;
2893    }
2894  }
2895
2896  return $calls;
2897}
2898
2899sub FilterFrames {
2900  my $symbols = shift;
2901  my $profile = shift;
2902
2903  if ($main::opt_retain eq '' && $main::opt_exclude eq '') {
2904    return $profile;
2905  }
2906
2907  my $result = {};
2908  foreach my $k (keys(%{$profile})) {
2909    my $count = $profile->{$k};
2910    my @addrs = split(/\n/, $k);
2911    my @path = ();
2912    foreach my $a (@addrs) {
2913      my $sym;
2914      if (exists($symbols->{$a})) {
2915        $sym = $symbols->{$a}->[0];
2916      } else {
2917        $sym = $a;
2918      }
2919      if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) {
2920        next;
2921      }
2922      if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) {
2923        next;
2924      }
2925      push(@path, $a);
2926    }
2927    if (scalar(@path) > 0) {
2928      my $reduced_path = join("\n", @path);
2929      AddEntry($result, $reduced_path, $count);
2930    }
2931  }
2932
2933  return $result;
2934}
2935
2936sub PrintCollapsedStacks {
2937  my $symbols = shift;
2938  my $profile = shift;
2939
2940  while (my ($stack_trace, $count) = each %$profile) {
2941    my @address = split(/\n/, $stack_trace);
2942    my @names = reverse ( map { ExtractSymbolNameInlineStack($symbols, $_) } @address );
2943    printf("%s %d\n", join(";", @names), $count);
2944  }
2945}
2946
2947sub RemoveUninterestingFrames {
2948  my $symbols = shift;
2949  my $profile = shift;
2950
2951  # List of function names to skip
2952  my %skip = ();
2953  my $skip_regexp = 'NOMATCH';
2954  if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
2955    foreach my $name ('@JEMALLOC_PREFIX@calloc',
2956                      'cfree',
2957                      '@JEMALLOC_PREFIX@malloc',
2958                      'newImpl',
2959                      'void* newImpl',
2960                      '@JEMALLOC_PREFIX@free',
2961                      '@JEMALLOC_PREFIX@memalign',
2962                      '@JEMALLOC_PREFIX@posix_memalign',
2963                      '@JEMALLOC_PREFIX@aligned_alloc',
2964                      'pvalloc',
2965                      '@JEMALLOC_PREFIX@valloc',
2966                      '@JEMALLOC_PREFIX@realloc',
2967                      '@JEMALLOC_PREFIX@mallocx',
2968                      '@JEMALLOC_PREFIX@rallocx',
2969                      '@JEMALLOC_PREFIX@xallocx',
2970                      '@JEMALLOC_PREFIX@dallocx',
2971                      '@JEMALLOC_PREFIX@sdallocx',
2972                      '@JEMALLOC_PREFIX@sdallocx_noflags',
2973                      'tc_calloc',
2974                      'tc_cfree',
2975                      'tc_malloc',
2976                      'tc_free',
2977                      'tc_memalign',
2978                      'tc_posix_memalign',
2979                      'tc_pvalloc',
2980                      'tc_valloc',
2981                      'tc_realloc',
2982                      'tc_new',
2983                      'tc_delete',
2984                      'tc_newarray',
2985                      'tc_deletearray',
2986                      'tc_new_nothrow',
2987                      'tc_newarray_nothrow',
2988                      'do_malloc',
2989                      '::do_malloc',   # new name -- got moved to an unnamed ns
2990                      '::do_malloc_or_cpp_alloc',
2991                      'DoSampledAllocation',
2992                      'simple_alloc::allocate',
2993                      '__malloc_alloc_template::allocate',
2994                      '__builtin_delete',
2995                      '__builtin_new',
2996                      '__builtin_vec_delete',
2997                      '__builtin_vec_new',
2998                      'operator new',
2999                      'operator new[]',
3000                      # The entry to our memory-allocation routines on OS X
3001                      'malloc_zone_malloc',
3002                      'malloc_zone_calloc',
3003                      'malloc_zone_valloc',
3004                      'malloc_zone_realloc',
3005                      'malloc_zone_memalign',
3006                      'malloc_zone_free',
3007                      # These mark the beginning/end of our custom sections
3008                      '__start_google_malloc',
3009                      '__stop_google_malloc',
3010                      '__start_malloc_hook',
3011                      '__stop_malloc_hook') {
3012      $skip{$name} = 1;
3013      $skip{"_" . $name} = 1;   # Mach (OS X) adds a _ prefix to everything
3014    }
3015    # TODO: Remove TCMalloc once everything has been
3016    # moved into the tcmalloc:: namespace and we have flushed
3017    # old code out of the system.
3018    $skip_regexp = "TCMalloc|^tcmalloc::";
3019  } elsif ($main::profile_type eq 'contention') {
3020    foreach my $vname ('base::RecordLockProfileData',
3021                       'base::SubmitMutexProfileData',
3022                       'base::SubmitSpinLockProfileData',
3023                       'Mutex::Unlock',
3024                       'Mutex::UnlockSlow',
3025                       'Mutex::ReaderUnlock',
3026                       'MutexLock::~MutexLock',
3027                       'SpinLock::Unlock',
3028                       'SpinLock::SlowUnlock',
3029                       'SpinLockHolder::~SpinLockHolder') {
3030      $skip{$vname} = 1;
3031    }
3032  } elsif ($main::profile_type eq 'cpu') {
3033    # Drop signal handlers used for CPU profile collection
3034    # TODO(dpeng): this should not be necessary; it's taken
3035    # care of by the general 2nd-pc mechanism below.
3036    foreach my $name ('ProfileData::Add',           # historical
3037                      'ProfileData::prof_handler',  # historical
3038                      'CpuProfiler::prof_handler',
3039                      '__FRAME_END__',
3040                      '__pthread_sighandler',
3041                      '__restore') {
3042      $skip{$name} = 1;
3043    }
3044  } else {
3045    # Nothing skipped for unknown types
3046  }
3047
3048  if ($main::profile_type eq 'cpu') {
3049    # If all the second-youngest program counters are the same,
3050    # this STRONGLY suggests that it is an artifact of measurement,
3051    # i.e., stack frames pushed by the CPU profiler signal handler.
3052    # Hence, we delete them.
3053    # (The topmost PC is read from the signal structure, not from
3054    # the stack, so it does not get involved.)
3055    while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) {
3056      my $result = {};
3057      my $func = '';
3058      if (exists($symbols->{$second_pc})) {
3059        $second_pc = $symbols->{$second_pc}->[0];
3060      }
3061      print STDERR "Removing $second_pc from all stack traces.\n";
3062      foreach my $k (keys(%{$profile})) {
3063        my $count = $profile->{$k};
3064        my @addrs = split(/\n/, $k);
3065        splice @addrs, 1, 1;
3066        my $reduced_path = join("\n", @addrs);
3067        AddEntry($result, $reduced_path, $count);
3068      }
3069      $profile = $result;
3070    }
3071  }
3072
3073  my $result = {};
3074  foreach my $k (keys(%{$profile})) {
3075    my $count = $profile->{$k};
3076    my @addrs = split(/\n/, $k);
3077    my @path = ();
3078    foreach my $a (@addrs) {
3079      if (exists($symbols->{$a})) {
3080        my $func = $symbols->{$a}->[0];
3081        if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
3082          # Throw away the portion of the backtrace seen so far, under the
3083          # assumption that previous frames were for functions internal to the
3084          # allocator.
3085          @path = ();
3086          next;
3087        }
3088      }
3089      push(@path, $a);
3090    }
3091    my $reduced_path = join("\n", @path);
3092    AddEntry($result, $reduced_path, $count);
3093  }
3094
3095  $result = FilterFrames($symbols, $result);
3096
3097  return $result;
3098}
3099
3100# Reduce profile to granularity given by user
3101sub ReduceProfile {
3102  my $symbols = shift;
3103  my $profile = shift;
3104  my $result = {};
3105  my $fullname_to_shortname_map = {};
3106  FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map);
3107  foreach my $k (keys(%{$profile})) {
3108    my $count = $profile->{$k};
3109    my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k);
3110    my @path = ();
3111    my %seen = ();
3112    $seen{''} = 1;      # So that empty keys are skipped
3113    foreach my $e (@translated) {
3114      # To avoid double-counting due to recursion, skip a stack-trace
3115      # entry if it has already been seen
3116      if (!$seen{$e}) {
3117        $seen{$e} = 1;
3118        push(@path, $e);
3119      }
3120    }
3121    my $reduced_path = join("\n", @path);
3122    AddEntry($result, $reduced_path, $count);
3123  }
3124  return $result;
3125}
3126
3127# Does the specified symbol array match the regexp?
3128sub SymbolMatches {
3129  my $sym = shift;
3130  my $re = shift;
3131  if (defined($sym)) {
3132    for (my $i = 0; $i < $#{$sym}; $i += 3) {
3133      if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) {
3134        return 1;
3135      }
3136    }
3137  }
3138  return 0;
3139}
3140
3141# Focus only on paths involving specified regexps
3142sub FocusProfile {
3143  my $symbols = shift;
3144  my $profile = shift;
3145  my $focus = shift;
3146  my $result = {};
3147  foreach my $k (keys(%{$profile})) {
3148    my $count = $profile->{$k};
3149    my @addrs = split(/\n/, $k);
3150    foreach my $a (@addrs) {
3151      # Reply if it matches either the address/shortname/fileline
3152      if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) {
3153        AddEntry($result, $k, $count);
3154        last;
3155      }
3156    }
3157  }
3158  return $result;
3159}
3160
3161# Focus only on paths not involving specified regexps
3162sub IgnoreProfile {
3163  my $symbols = shift;
3164  my $profile = shift;
3165  my $ignore = shift;
3166  my $result = {};
3167  foreach my $k (keys(%{$profile})) {
3168    my $count = $profile->{$k};
3169    my @addrs = split(/\n/, $k);
3170    my $matched = 0;
3171    foreach my $a (@addrs) {
3172      # Reply if it matches either the address/shortname/fileline
3173      if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) {
3174        $matched = 1;
3175        last;
3176      }
3177    }
3178    if (!$matched) {
3179      AddEntry($result, $k, $count);
3180    }
3181  }
3182  return $result;
3183}
3184
3185# Get total count in profile
3186sub TotalProfile {
3187  my $profile = shift;
3188  my $result = 0;
3189  foreach my $k (keys(%{$profile})) {
3190    $result += $profile->{$k};
3191  }
3192  return $result;
3193}
3194
3195# Add A to B
3196sub AddProfile {
3197  my $A = shift;
3198  my $B = shift;
3199
3200  my $R = {};
3201  # add all keys in A
3202  foreach my $k (keys(%{$A})) {
3203    my $v = $A->{$k};
3204    AddEntry($R, $k, $v);
3205  }
3206  # add all keys in B
3207  foreach my $k (keys(%{$B})) {
3208    my $v = $B->{$k};
3209    AddEntry($R, $k, $v);
3210  }
3211  return $R;
3212}
3213
3214# Merges symbol maps
3215sub MergeSymbols {
3216  my $A = shift;
3217  my $B = shift;
3218
3219  my $R = {};
3220  foreach my $k (keys(%{$A})) {
3221    $R->{$k} = $A->{$k};
3222  }
3223  if (defined($B)) {
3224    foreach my $k (keys(%{$B})) {
3225      $R->{$k} = $B->{$k};
3226    }
3227  }
3228  return $R;
3229}
3230
3231
3232# Add A to B
3233sub AddPcs {
3234  my $A = shift;
3235  my $B = shift;
3236
3237  my $R = {};
3238  # add all keys in A
3239  foreach my $k (keys(%{$A})) {
3240    $R->{$k} = 1
3241  }
3242  # add all keys in B
3243  foreach my $k (keys(%{$B})) {
3244    $R->{$k} = 1
3245  }
3246  return $R;
3247}
3248
3249# Subtract B from A
3250sub SubtractProfile {
3251  my $A = shift;
3252  my $B = shift;
3253
3254  my $R = {};
3255  foreach my $k (keys(%{$A})) {
3256    my $v = $A->{$k} - GetEntry($B, $k);
3257    if ($v < 0 && $main::opt_drop_negative) {
3258      $v = 0;
3259    }
3260    AddEntry($R, $k, $v);
3261  }
3262  if (!$main::opt_drop_negative) {
3263    # Take care of when subtracted profile has more entries
3264    foreach my $k (keys(%{$B})) {
3265      if (!exists($A->{$k})) {
3266        AddEntry($R, $k, 0 - $B->{$k});
3267      }
3268    }
3269  }
3270  return $R;
3271}
3272
3273# Get entry from profile; zero if not present
3274sub GetEntry {
3275  my $profile = shift;
3276  my $k = shift;
3277  if (exists($profile->{$k})) {
3278    return $profile->{$k};
3279  } else {
3280    return 0;
3281  }
3282}
3283
3284# Add entry to specified profile
3285sub AddEntry {
3286  my $profile = shift;
3287  my $k = shift;
3288  my $n = shift;
3289  if (!exists($profile->{$k})) {
3290    $profile->{$k} = 0;
3291  }
3292  $profile->{$k} += $n;
3293}
3294
3295# Add a stack of entries to specified profile, and add them to the $pcs
3296# list.
3297sub AddEntries {
3298  my $profile = shift;
3299  my $pcs = shift;
3300  my $stack = shift;
3301  my $count = shift;
3302  my @k = ();
3303
3304  foreach my $e (split(/\s+/, $stack)) {
3305    my $pc = HexExtend($e);
3306    $pcs->{$pc} = 1;
3307    push @k, $pc;
3308  }
3309  AddEntry($profile, (join "\n", @k), $count);
3310}
3311
3312##### Code to profile a server dynamically #####
3313
3314sub CheckSymbolPage {
3315  my $url = SymbolPageURL();
3316  my $command = ShellEscape(@URL_FETCHER, $url);
3317  open(SYMBOL, "$command |") or error($command);
3318  my $line = <SYMBOL>;
3319  $line =~ s/\r//g;         # turn windows-looking lines into unix-looking lines
3320  close(SYMBOL);
3321  unless (defined($line)) {
3322    error("$url doesn't exist\n");
3323  }
3324
3325  if ($line =~ /^num_symbols:\s+(\d+)$/) {
3326    if ($1 == 0) {
3327      error("Stripped binary. No symbols available.\n");
3328    }
3329  } else {
3330    error("Failed to get the number of symbols from $url\n");
3331  }
3332}
3333
3334sub IsProfileURL {
3335  my $profile_name = shift;
3336  if (-f $profile_name) {
3337    printf STDERR "Using local file $profile_name.\n";
3338    return 0;
3339  }
3340  return 1;
3341}
3342
3343sub ParseProfileURL {
3344  my $profile_name = shift;
3345
3346  if (!defined($profile_name) || $profile_name eq "") {
3347    return ();
3348  }
3349
3350  # Split profile URL - matches all non-empty strings, so no test.
3351  $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,;
3352
3353  my $proto = $1 || "http://";
3354  my $hostport = $2;
3355  my $prefix = $3;
3356  my $profile = $4 || "/";
3357
3358  my $host = $hostport;
3359  $host =~ s/:.*//;
3360
3361  my $baseurl = "$proto$hostport$prefix";
3362  return ($host, $baseurl, $profile);
3363}
3364
3365# We fetch symbols from the first profile argument.
3366sub SymbolPageURL {
3367  my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
3368  return "$baseURL$SYMBOL_PAGE";
3369}
3370
3371sub FetchProgramName() {
3372  my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]);
3373  my $url = "$baseURL$PROGRAM_NAME_PAGE";
3374  my $command_line = ShellEscape(@URL_FETCHER, $url);
3375  open(CMDLINE, "$command_line |") or error($command_line);
3376  my $cmdline = <CMDLINE>;
3377  $cmdline =~ s/\r//g;   # turn windows-looking lines into unix-looking lines
3378  close(CMDLINE);
3379  error("Failed to get program name from $url\n") unless defined($cmdline);
3380  $cmdline =~ s/\x00.+//;  # Remove argv[1] and latters.
3381  $cmdline =~ s!\n!!g;  # Remove LFs.
3382  return $cmdline;
3383}
3384
3385# Gee, curl's -L (--location) option isn't reliable at least
3386# with its 7.12.3 version.  Curl will forget to post data if
3387# there is a redirection.  This function is a workaround for
3388# curl.  Redirection happens on borg hosts.
3389sub ResolveRedirectionForCurl {
3390  my $url = shift;
3391  my $command_line = ShellEscape(@URL_FETCHER, "--head", $url);
3392  open(CMDLINE, "$command_line |") or error($command_line);
3393  while (<CMDLINE>) {
3394    s/\r//g;         # turn windows-looking lines into unix-looking lines
3395    if (/^Location: (.*)/) {
3396      $url = $1;
3397    }
3398  }
3399  close(CMDLINE);
3400  return $url;
3401}
3402
3403# Add a timeout flat to URL_FETCHER.  Returns a new list.
3404sub AddFetchTimeout {
3405  my $timeout = shift;
3406  my @fetcher = @_;
3407  if (defined($timeout)) {
3408    if (join(" ", @fetcher) =~ m/\bcurl -s/) {
3409      push(@fetcher, "--max-time", sprintf("%d", $timeout));
3410    } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) {
3411      push(@fetcher, sprintf("--deadline=%d", $timeout));
3412    }
3413  }
3414  return @fetcher;
3415}
3416
3417# Reads a symbol map from the file handle name given as $1, returning
3418# the resulting symbol map.  Also processes variables relating to symbols.
3419# Currently, the only variable processed is 'binary=<value>' which updates
3420# $main::prog to have the correct program name.
3421sub ReadSymbols {
3422  my $in = shift;
3423  my $map = {};
3424  while (<$in>) {
3425    s/\r//g;         # turn windows-looking lines into unix-looking lines
3426    # Removes all the leading zeroes from the symbols, see comment below.
3427    if (m/^0x0*([0-9a-f]+)\s+(.+)/) {
3428      $map->{$1} = $2;
3429    } elsif (m/^---/) {
3430      last;
3431    } elsif (m/^([a-z][^=]*)=(.*)$/ ) {
3432      my ($variable, $value) = ($1, $2);
3433      for ($variable, $value) {
3434        s/^\s+//;
3435        s/\s+$//;
3436      }
3437      if ($variable eq "binary") {
3438        if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) {
3439          printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n",
3440                         $main::prog, $value);
3441        }
3442        $main::prog = $value;
3443      } else {
3444        printf STDERR ("Ignoring unknown variable in symbols list: " .
3445            "'%s' = '%s'\n", $variable, $value);
3446      }
3447    }
3448  }
3449  return $map;
3450}
3451
3452sub URLEncode {
3453  my $str = shift;
3454  $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg;
3455  return $str;
3456}
3457
3458sub AppendSymbolFilterParams {
3459  my $url = shift;
3460  my @params = ();
3461  if ($main::opt_retain ne '') {
3462    push(@params, sprintf("retain=%s", URLEncode($main::opt_retain)));
3463  }
3464  if ($main::opt_exclude ne '') {
3465    push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude)));
3466  }
3467  if (scalar @params > 0) {
3468    $url = sprintf("%s?%s", $url, join("&", @params));
3469  }
3470  return $url;
3471}
3472
3473# Fetches and processes symbols to prepare them for use in the profile output
3474# code.  If the optional 'symbol_map' arg is not given, fetches symbols from
3475# $SYMBOL_PAGE for all PC values found in profile.  Otherwise, the raw symbols
3476# are assumed to have already been fetched into 'symbol_map' and are simply
3477# extracted and processed.
3478sub FetchSymbols {
3479  my $pcset = shift;
3480  my $symbol_map = shift;
3481
3482  my %seen = ();
3483  my @pcs = grep { !$seen{$_}++ } keys(%$pcset);  # uniq
3484
3485  if (!defined($symbol_map)) {
3486    my $post_data = join("+", sort((map {"0x" . "$_"} @pcs)));
3487
3488    open(POSTFILE, ">$main::tmpfile_sym");
3489    print POSTFILE $post_data;
3490    close(POSTFILE);
3491
3492    my $url = SymbolPageURL();
3493
3494    my $command_line;
3495    if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) {
3496      $url = ResolveRedirectionForCurl($url);
3497      $url = AppendSymbolFilterParams($url);
3498      $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym",
3499                                  $url);
3500    } else {
3501      $url = AppendSymbolFilterParams($url);
3502      $command_line = (ShellEscape(@URL_FETCHER, "--post", $url)
3503                       . " < " . ShellEscape($main::tmpfile_sym));
3504    }
3505    # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols.
3506    my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"});
3507    open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line);
3508    $symbol_map = ReadSymbols(*SYMBOL{IO});
3509    close(SYMBOL);
3510  }
3511
3512  my $symbols = {};
3513  foreach my $pc (@pcs) {
3514    my $fullname;
3515    # For 64 bits binaries, symbols are extracted with 8 leading zeroes.
3516    # Then /symbol reads the long symbols in as uint64, and outputs
3517    # the result with a "0x%08llx" format which get rid of the zeroes.
3518    # By removing all the leading zeroes in both $pc and the symbols from
3519    # /symbol, the symbols match and are retrievable from the map.
3520    my $shortpc = $pc;
3521    $shortpc =~ s/^0*//;
3522    # Each line may have a list of names, which includes the function
3523    # and also other functions it has inlined.  They are separated (in
3524    # PrintSymbolizedProfile), by --, which is illegal in function names.
3525    my $fullnames;
3526    if (defined($symbol_map->{$shortpc})) {
3527      $fullnames = $symbol_map->{$shortpc};
3528    } else {
3529      $fullnames = "0x" . $pc;  # Just use addresses
3530    }
3531    my $sym = [];
3532    $symbols->{$pc} = $sym;
3533    foreach my $fullname (split("--", $fullnames)) {
3534      my $name = ShortFunctionName($fullname);
3535      push(@{$sym}, $name, "?", $fullname);
3536    }
3537  }
3538  return $symbols;
3539}
3540
3541sub BaseName {
3542  my $file_name = shift;
3543  $file_name =~ s!^.*/!!;  # Remove directory name
3544  return $file_name;
3545}
3546
3547sub MakeProfileBaseName {
3548  my ($binary_name, $profile_name) = @_;
3549  my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
3550  my $binary_shortname = BaseName($binary_name);
3551  return sprintf("%s.%s.%s",
3552                 $binary_shortname, $main::op_time, $host);
3553}
3554
3555sub FetchDynamicProfile {
3556  my $binary_name = shift;
3557  my $profile_name = shift;
3558  my $fetch_name_only = shift;
3559  my $encourage_patience = shift;
3560
3561  if (!IsProfileURL($profile_name)) {
3562    return $profile_name;
3563  } else {
3564    my ($host, $baseURL, $path) = ParseProfileURL($profile_name);
3565    if ($path eq "" || $path eq "/") {
3566      # Missing type specifier defaults to cpu-profile
3567      $path = $PROFILE_PAGE;
3568    }
3569
3570    my $profile_file = MakeProfileBaseName($binary_name, $profile_name);
3571
3572    my $url = "$baseURL$path";
3573    my $fetch_timeout = undef;
3574    if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) {
3575      if ($path =~ m/[?]/) {
3576        $url .= "&";
3577      } else {
3578        $url .= "?";
3579      }
3580      $url .= sprintf("seconds=%d", $main::opt_seconds);
3581      $fetch_timeout = $main::opt_seconds * 1.01 + 60;
3582      # Set $profile_type for consumption by PrintSymbolizedProfile.
3583      $main::profile_type = 'cpu';
3584    } else {
3585      # For non-CPU profiles, we add a type-extension to
3586      # the target profile file name.
3587      my $suffix = $path;
3588      $suffix =~ s,/,.,g;
3589      $profile_file .= $suffix;
3590      # Set $profile_type for consumption by PrintSymbolizedProfile.
3591      if ($path =~ m/$HEAP_PAGE/) {
3592        $main::profile_type = 'heap';
3593      } elsif ($path =~ m/$GROWTH_PAGE/) {
3594        $main::profile_type = 'growth';
3595      } elsif ($path =~ m/$CONTENTION_PAGE/) {
3596        $main::profile_type = 'contention';
3597      }
3598    }
3599
3600    my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof");
3601    if (! -d $profile_dir) {
3602      mkdir($profile_dir)
3603          || die("Unable to create profile directory $profile_dir: $!\n");
3604    }
3605    my $tmp_profile = "$profile_dir/.tmp.$profile_file";
3606    my $real_profile = "$profile_dir/$profile_file";
3607
3608    if ($fetch_name_only > 0) {
3609      return $real_profile;
3610    }
3611
3612    my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER);
3613    my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile);
3614    if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){
3615      print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n  ${real_profile}\n";
3616      if ($encourage_patience) {
3617        print STDERR "Be patient...\n";
3618      }
3619    } else {
3620      print STDERR "Fetching $path profile from $url to\n  ${real_profile}\n";
3621    }
3622
3623    (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n");
3624    (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n");
3625    print STDERR "Wrote profile to $real_profile\n";
3626    $main::collected_profile = $real_profile;
3627    return $main::collected_profile;
3628  }
3629}
3630
3631# Collect profiles in parallel
3632sub FetchDynamicProfiles {
3633  my $items = scalar(@main::pfile_args);
3634  my $levels = log($items) / log(2);
3635
3636  if ($items == 1) {
3637    $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1);
3638  } else {
3639    # math rounding issues
3640    if ((2 ** $levels) < $items) {
3641     $levels++;
3642    }
3643    my $count = scalar(@main::pfile_args);
3644    for (my $i = 0; $i < $count; $i++) {
3645      $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0);
3646    }
3647    print STDERR "Fetching $count profiles, Be patient...\n";
3648    FetchDynamicProfilesRecurse($levels, 0, 0);
3649    $main::collected_profile = join(" \\\n    ", @main::profile_files);
3650  }
3651}
3652
3653# Recursively fork a process to get enough processes
3654# collecting profiles
3655sub FetchDynamicProfilesRecurse {
3656  my $maxlevel = shift;
3657  my $level = shift;
3658  my $position = shift;
3659
3660  if (my $pid = fork()) {
3661    $position = 0 | ($position << 1);
3662    TryCollectProfile($maxlevel, $level, $position);
3663    wait;
3664  } else {
3665    $position = 1 | ($position << 1);
3666    TryCollectProfile($maxlevel, $level, $position);
3667    cleanup();
3668    exit(0);
3669  }
3670}
3671
3672# Collect a single profile
3673sub TryCollectProfile {
3674  my $maxlevel = shift;
3675  my $level = shift;
3676  my $position = shift;
3677
3678  if ($level >= ($maxlevel - 1)) {
3679    if ($position < scalar(@main::pfile_args)) {
3680      FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0);
3681    }
3682  } else {
3683    FetchDynamicProfilesRecurse($maxlevel, $level+1, $position);
3684  }
3685}
3686
3687##### Parsing code #####
3688
3689# Provide a small streaming-read module to handle very large
3690# cpu-profile files.  Stream in chunks along a sliding window.
3691# Provides an interface to get one 'slot', correctly handling
3692# endian-ness differences.  A slot is one 32-bit or 64-bit word
3693# (depending on the input profile).  We tell endianness and bit-size
3694# for the profile by looking at the first 8 bytes: in cpu profiles,
3695# the second slot is always 3 (we'll accept anything that's not 0).
3696BEGIN {
3697  package CpuProfileStream;
3698
3699  sub new {
3700    my ($class, $file, $fname) = @_;
3701    my $self = { file        => $file,
3702                 base        => 0,
3703                 stride      => 512 * 1024,   # must be a multiple of bitsize/8
3704                 slots       => [],
3705                 unpack_code => "",           # N for big-endian, V for little
3706                 perl_is_64bit => 1,          # matters if profile is 64-bit
3707    };
3708    bless $self, $class;
3709    # Let unittests adjust the stride
3710    if ($main::opt_test_stride > 0) {
3711      $self->{stride} = $main::opt_test_stride;
3712    }
3713    # Read the first two slots to figure out bitsize and endianness.
3714    my $slots = $self->{slots};
3715    my $str;
3716    read($self->{file}, $str, 8);
3717    # Set the global $address_length based on what we see here.
3718    # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars).
3719    $address_length = ($str eq (chr(0)x8)) ? 16 : 8;
3720    if ($address_length == 8) {
3721      if (substr($str, 6, 2) eq chr(0)x2) {
3722        $self->{unpack_code} = 'V';  # Little-endian.
3723      } elsif (substr($str, 4, 2) eq chr(0)x2) {
3724        $self->{unpack_code} = 'N';  # Big-endian
3725      } else {
3726        ::error("$fname: header size >= 2**16\n");
3727      }
3728      @$slots = unpack($self->{unpack_code} . "*", $str);
3729    } else {
3730      # If we're a 64-bit profile, check if we're a 64-bit-capable
3731      # perl.  Otherwise, each slot will be represented as a float
3732      # instead of an int64, losing precision and making all the
3733      # 64-bit addresses wrong.  We won't complain yet, but will
3734      # later if we ever see a value that doesn't fit in 32 bits.
3735      my $has_q = 0;
3736      eval { $has_q = pack("Q", "1") ? 1 : 1; };
3737      if (!$has_q) {
3738        $self->{perl_is_64bit} = 0;
3739      }
3740      read($self->{file}, $str, 8);
3741      if (substr($str, 4, 4) eq chr(0)x4) {
3742        # We'd love to use 'Q', but it's a) not universal, b) not endian-proof.
3743        $self->{unpack_code} = 'V';  # Little-endian.
3744      } elsif (substr($str, 0, 4) eq chr(0)x4) {
3745        $self->{unpack_code} = 'N';  # Big-endian
3746      } else {
3747        ::error("$fname: header size >= 2**32\n");
3748      }
3749      my @pair = unpack($self->{unpack_code} . "*", $str);
3750      # Since we know one of the pair is 0, it's fine to just add them.
3751      @$slots = (0, $pair[0] + $pair[1]);
3752    }
3753    return $self;
3754  }
3755
3756  # Load more data when we access slots->get(X) which is not yet in memory.
3757  sub overflow {
3758    my ($self) = @_;
3759    my $slots = $self->{slots};
3760    $self->{base} += $#$slots + 1;   # skip over data we're replacing
3761    my $str;
3762    read($self->{file}, $str, $self->{stride});
3763    if ($address_length == 8) {      # the 32-bit case
3764      # This is the easy case: unpack provides 32-bit unpacking primitives.
3765      @$slots = unpack($self->{unpack_code} . "*", $str);
3766    } else {
3767      # We need to unpack 32 bits at a time and combine.
3768      my @b32_values = unpack($self->{unpack_code} . "*", $str);
3769      my @b64_values = ();
3770      for (my $i = 0; $i < $#b32_values; $i += 2) {
3771        # TODO(csilvers): if this is a 32-bit perl, the math below
3772        #    could end up in a too-large int, which perl will promote
3773        #    to a double, losing necessary precision.  Deal with that.
3774        #    Right now, we just die.
3775        my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]);
3776        if ($self->{unpack_code} eq 'N') {    # big-endian
3777          ($lo, $hi) = ($hi, $lo);
3778        }
3779        my $value = $lo + $hi * (2**32);
3780        if (!$self->{perl_is_64bit} &&   # check value is exactly represented
3781            (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) {
3782          ::error("Need a 64-bit perl to process this 64-bit profile.\n");
3783        }
3784        push(@b64_values, $value);
3785      }
3786      @$slots = @b64_values;
3787    }
3788  }
3789
3790  # Access the i-th long in the file (logically), or -1 at EOF.
3791  sub get {
3792    my ($self, $idx) = @_;
3793    my $slots = $self->{slots};
3794    while ($#$slots >= 0) {
3795      if ($idx < $self->{base}) {
3796        # The only time we expect a reference to $slots[$i - something]
3797        # after referencing $slots[$i] is reading the very first header.
3798        # Since $stride > |header|, that shouldn't cause any lookback
3799        # errors.  And everything after the header is sequential.
3800        print STDERR "Unexpected look-back reading CPU profile";
3801        return -1;   # shrug, don't know what better to return
3802      } elsif ($idx > $self->{base} + $#$slots) {
3803        $self->overflow();
3804      } else {
3805        return $slots->[$idx - $self->{base}];
3806      }
3807    }
3808    # If we get here, $slots is [], which means we've reached EOF
3809    return -1;  # unique since slots is supposed to hold unsigned numbers
3810  }
3811}
3812
3813# Reads the top, 'header' section of a profile, and returns the last
3814# line of the header, commonly called a 'header line'.  The header
3815# section of a profile consists of zero or more 'command' lines that
3816# are instructions to jeprof, which jeprof executes when reading the
3817# header.  All 'command' lines start with a %.  After the command
3818# lines is the 'header line', which is a profile-specific line that
3819# indicates what type of profile it is, and perhaps other global
3820# information about the profile.  For instance, here's a header line
3821# for a heap profile:
3822#   heap profile:     53:    38236 [  5525:  1284029] @ heapprofile
3823# For historical reasons, the CPU profile does not contain a text-
3824# readable header line.  If the profile looks like a CPU profile,
3825# this function returns "".  If no header line could be found, this
3826# function returns undef.
3827#
3828# The following commands are recognized:
3829#   %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:'
3830#
3831# The input file should be in binmode.
3832sub ReadProfileHeader {
3833  local *PROFILE = shift;
3834  my $firstchar = "";
3835  my $line = "";
3836  read(PROFILE, $firstchar, 1);
3837  seek(PROFILE, -1, 1);                    # unread the firstchar
3838  if ($firstchar !~ /[[:print:]]/) {       # is not a text character
3839    return "";
3840  }
3841  while (defined($line = <PROFILE>)) {
3842    $line =~ s/\r//g;   # turn windows-looking lines into unix-looking lines
3843    if ($line =~ /^%warn\s+(.*)/) {        # 'warn' command
3844      # Note this matches both '%warn blah\n' and '%warn\n'.
3845      print STDERR "WARNING: $1\n";        # print the rest of the line
3846    } elsif ($line =~ /^%/) {
3847      print STDERR "Ignoring unknown command from profile header: $line";
3848    } else {
3849      # End of commands, must be the header line.
3850      return $line;
3851    }
3852  }
3853  return undef;     # got to EOF without seeing a header line
3854}
3855
3856sub IsSymbolizedProfileFile {
3857  my $file_name = shift;
3858  if (!(-e $file_name) || !(-r $file_name)) {
3859    return 0;
3860  }
3861  # Check if the file contains a symbol-section marker.
3862  open(TFILE, "<$file_name");
3863  binmode TFILE;
3864  my $firstline = ReadProfileHeader(*TFILE);
3865  close(TFILE);
3866  if (!$firstline) {
3867    return 0;
3868  }
3869  $SYMBOL_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
3870  my $symbol_marker = $&;
3871  return $firstline =~ /^--- *$symbol_marker/;
3872}
3873
3874# Parse profile generated by common/profiler.cc and return a reference
3875# to a map:
3876#      $result->{version}     Version number of profile file
3877#      $result->{period}      Sampling period (in microseconds)
3878#      $result->{profile}     Profile object
3879#      $result->{threads}     Map of thread IDs to profile objects
3880#      $result->{map}         Memory map info from profile
3881#      $result->{pcs}         Hash of all PC values seen, key is hex address
3882sub ReadProfile {
3883  my $prog = shift;
3884  my $fname = shift;
3885  my $result;            # return value
3886
3887  $CONTENTION_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
3888  my $contention_marker = $&;
3889  $GROWTH_PAGE  =~ m,[^/]+$,;    # matches everything after the last slash
3890  my $growth_marker = $&;
3891  $SYMBOL_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
3892  my $symbol_marker = $&;
3893  $PROFILE_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
3894  my $profile_marker = $&;
3895  $HEAP_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
3896  my $heap_marker = $&;
3897
3898  # Look at first line to see if it is a heap or a CPU profile.
3899  # CPU profile may start with no header at all, and just binary data
3900  # (starting with \0\0\0\0) -- in that case, don't try to read the
3901  # whole firstline, since it may be gigabytes(!) of data.
3902  open(PROFILE, "<$fname") || error("$fname: $!\n");
3903  binmode PROFILE;      # New perls do UTF-8 processing
3904  my $header = ReadProfileHeader(*PROFILE);
3905  if (!defined($header)) {   # means "at EOF"
3906    error("Profile is empty.\n");
3907  }
3908
3909  my $symbols;
3910  if ($header =~ m/^--- *$symbol_marker/o) {
3911    # Verify that the user asked for a symbolized profile
3912    if (!$main::use_symbolized_profile) {
3913      # we have both a binary and symbolized profiles, abort
3914      error("FATAL ERROR: Symbolized profile\n   $fname\ncannot be used with " .
3915            "a binary arg. Try again without passing\n   $prog\n");
3916    }
3917    # Read the symbol section of the symbolized profile file.
3918    $symbols = ReadSymbols(*PROFILE{IO});
3919    # Read the next line to get the header for the remaining profile.
3920    $header = ReadProfileHeader(*PROFILE) || "";
3921  }
3922
3923  if ($header =~ m/^--- *($heap_marker|$growth_marker)/o) {
3924    # Skip "--- ..." line for profile types that have their own headers.
3925    $header = ReadProfileHeader(*PROFILE) || "";
3926  }
3927
3928  $main::profile_type = '';
3929
3930  if ($header =~ m/^heap profile:.*$growth_marker/o) {
3931    $main::profile_type = 'growth';
3932    $result =  ReadHeapProfile($prog, *PROFILE, $header);
3933  } elsif ($header =~ m/^heap profile:/) {
3934    $main::profile_type = 'heap';
3935    $result =  ReadHeapProfile($prog, *PROFILE, $header);
3936  } elsif ($header =~ m/^heap/) {
3937    $main::profile_type = 'heap';
3938    $result = ReadThreadedHeapProfile($prog, $fname, $header);
3939  } elsif ($header =~ m/^--- *$contention_marker/o) {
3940    $main::profile_type = 'contention';
3941    $result = ReadSynchProfile($prog, *PROFILE);
3942  } elsif ($header =~ m/^--- *Stacks:/) {
3943    print STDERR
3944      "Old format contention profile: mistakenly reports " .
3945      "condition variable signals as lock contentions.\n";
3946    $main::profile_type = 'contention';
3947    $result = ReadSynchProfile($prog, *PROFILE);
3948  } elsif ($header =~ m/^--- *$profile_marker/) {
3949    # the binary cpu profile data starts immediately after this line
3950    $main::profile_type = 'cpu';
3951    $result = ReadCPUProfile($prog, $fname, *PROFILE);
3952  } else {
3953    if (defined($symbols)) {
3954      # a symbolized profile contains a format we don't recognize, bail out
3955      error("$fname: Cannot recognize profile section after symbols.\n");
3956    }
3957    # no ascii header present -- must be a CPU profile
3958    $main::profile_type = 'cpu';
3959    $result = ReadCPUProfile($prog, $fname, *PROFILE);
3960  }
3961
3962  close(PROFILE);
3963
3964  # if we got symbols along with the profile, return those as well
3965  if (defined($symbols)) {
3966    $result->{symbols} = $symbols;
3967  }
3968
3969  return $result;
3970}
3971
3972# Subtract one from caller pc so we map back to call instr.
3973# However, don't do this if we're reading a symbolized profile
3974# file, in which case the subtract-one was done when the file
3975# was written.
3976#
3977# We apply the same logic to all readers, though ReadCPUProfile uses an
3978# independent implementation.
3979sub FixCallerAddresses {
3980  my $stack = shift;
3981  # --raw/http: Always subtract one from pc's, because PrintSymbolizedProfile()
3982  # dumps unadjusted profiles.
3983  {
3984    $stack =~ /(\s)/;
3985    my $delimiter = $1;
3986    my @addrs = split(' ', $stack);
3987    my @fixedaddrs;
3988    $#fixedaddrs = $#addrs;
3989    if ($#addrs >= 0) {
3990      $fixedaddrs[0] = $addrs[0];
3991    }
3992    for (my $i = 1; $i <= $#addrs; $i++) {
3993      $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1");
3994    }
3995    return join $delimiter, @fixedaddrs;
3996  }
3997}
3998
3999# CPU profile reader
4000sub ReadCPUProfile {
4001  my $prog = shift;
4002  my $fname = shift;       # just used for logging
4003  local *PROFILE = shift;
4004  my $version;
4005  my $period;
4006  my $i;
4007  my $profile = {};
4008  my $pcs = {};
4009
4010  # Parse string into array of slots.
4011  my $slots = CpuProfileStream->new(*PROFILE, $fname);
4012
4013  # Read header.  The current header version is a 5-element structure
4014  # containing:
4015  #   0: header count (always 0)
4016  #   1: header "words" (after this one: 3)
4017  #   2: format version (0)
4018  #   3: sampling period (usec)
4019  #   4: unused padding (always 0)
4020  if ($slots->get(0) != 0 ) {
4021    error("$fname: not a profile file, or old format profile file\n");
4022  }
4023  $i = 2 + $slots->get(1);
4024  $version = $slots->get(2);
4025  $period = $slots->get(3);
4026  # Do some sanity checking on these header values.
4027  if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) {
4028    error("$fname: not a profile file, or corrupted profile file\n");
4029  }
4030
4031  # Parse profile
4032  while ($slots->get($i) != -1) {
4033    my $n = $slots->get($i++);
4034    my $d = $slots->get($i++);
4035    if ($d > (2**16)) {  # TODO(csilvers): what's a reasonable max-stack-depth?
4036      my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8));
4037      print STDERR "At index $i (address $addr):\n";
4038      error("$fname: stack trace depth >= 2**32\n");
4039    }
4040    if ($slots->get($i) == 0) {
4041      # End of profile data marker
4042      $i += $d;
4043      last;
4044    }
4045
4046    # Make key out of the stack entries
4047    my @k = ();
4048    for (my $j = 0; $j < $d; $j++) {
4049      my $pc = $slots->get($i+$j);
4050      # Subtract one from caller pc so we map back to call instr.
4051      $pc--;
4052      $pc = sprintf("%0*x", $address_length, $pc);
4053      $pcs->{$pc} = 1;
4054      push @k, $pc;
4055    }
4056
4057    AddEntry($profile, (join "\n", @k), $n);
4058    $i += $d;
4059  }
4060
4061  # Parse map
4062  my $map = '';
4063  seek(PROFILE, $i * 4, 0);
4064  read(PROFILE, $map, (stat PROFILE)[7]);
4065
4066  my $r = {};
4067  $r->{version} = $version;
4068  $r->{period} = $period;
4069  $r->{profile} = $profile;
4070  $r->{libs} = ParseLibraries($prog, $map, $pcs);
4071  $r->{pcs} = $pcs;
4072
4073  return $r;
4074}
4075
4076sub HeapProfileIndex {
4077  my $index = 1;
4078  if ($main::opt_inuse_space) {
4079    $index = 1;
4080  } elsif ($main::opt_inuse_objects) {
4081    $index = 0;
4082  } elsif ($main::opt_alloc_space) {
4083    $index = 3;
4084  } elsif ($main::opt_alloc_objects) {
4085    $index = 2;
4086  }
4087  return $index;
4088}
4089
4090sub ReadMappedLibraries {
4091  my $fh = shift;
4092  my $map = "";
4093  # Read the /proc/self/maps data
4094  while (<$fh>) {
4095    s/\r//g;         # turn windows-looking lines into unix-looking lines
4096    $map .= $_;
4097  }
4098  return $map;
4099}
4100
4101sub ReadMemoryMap {
4102  my $fh = shift;
4103  my $map = "";
4104  # Read /proc/self/maps data as formatted by DumpAddressMap()
4105  my $buildvar = "";
4106  while (<PROFILE>) {
4107    s/\r//g;         # turn windows-looking lines into unix-looking lines
4108    # Parse "build=<dir>" specification if supplied
4109    if (m/^\s*build=(.*)\n/) {
4110      $buildvar = $1;
4111    }
4112
4113    # Expand "$build" variable if available
4114    $_ =~ s/\$build\b/$buildvar/g;
4115
4116    $map .= $_;
4117  }
4118  return $map;
4119}
4120
4121sub AdjustSamples {
4122  my ($sample_adjustment, $sampling_algorithm, $n1, $s1, $n2, $s2) = @_;
4123  if ($sample_adjustment) {
4124    if ($sampling_algorithm == 2) {
4125      # Remote-heap version 2
4126      # The sampling frequency is the rate of a Poisson process.
4127      # This means that the probability of sampling an allocation of
4128      # size X with sampling rate Y is 1 - exp(-X/Y)
4129      if ($n1 != 0) {
4130        my $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
4131        my $scale_factor = 1/(1 - exp(-$ratio));
4132        $n1 *= $scale_factor;
4133        $s1 *= $scale_factor;
4134      }
4135      if ($n2 != 0) {
4136        my $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
4137        my $scale_factor = 1/(1 - exp(-$ratio));
4138        $n2 *= $scale_factor;
4139        $s2 *= $scale_factor;
4140      }
4141    } else {
4142      # Remote-heap version 1
4143      my $ratio;
4144      $ratio = (($s1*1.0)/$n1)/($sample_adjustment);
4145      if ($ratio < 1) {
4146        $n1 /= $ratio;
4147        $s1 /= $ratio;
4148      }
4149      $ratio = (($s2*1.0)/$n2)/($sample_adjustment);
4150      if ($ratio < 1) {
4151        $n2 /= $ratio;
4152        $s2 /= $ratio;
4153      }
4154    }
4155  }
4156  return ($n1, $s1, $n2, $s2);
4157}
4158
4159sub ReadHeapProfile {
4160  my $prog = shift;
4161  local *PROFILE = shift;
4162  my $header = shift;
4163
4164  my $index = HeapProfileIndex();
4165
4166  # Find the type of this profile.  The header line looks like:
4167  #    heap profile:   1246:  8800744 [  1246:  8800744] @ <heap-url>/266053
4168  # There are two pairs <count: size>, the first inuse objects/space, and the
4169  # second allocated objects/space.  This is followed optionally by a profile
4170  # type, and if that is present, optionally by a sampling frequency.
4171  # For remote heap profiles (v1):
4172  # The interpretation of the sampling frequency is that the profiler, for
4173  # each sample, calculates a uniformly distributed random integer less than
4174  # the given value, and records the next sample after that many bytes have
4175  # been allocated.  Therefore, the expected sample interval is half of the
4176  # given frequency.  By default, if not specified, the expected sample
4177  # interval is 128KB.  Only remote-heap-page profiles are adjusted for
4178  # sample size.
4179  # For remote heap profiles (v2):
4180  # The sampling frequency is the rate of a Poisson process. This means that
4181  # the probability of sampling an allocation of size X with sampling rate Y
4182  # is 1 - exp(-X/Y)
4183  # For version 2, a typical header line might look like this:
4184  # heap profile:   1922: 127792360 [  1922: 127792360] @ <heap-url>_v2/524288
4185  # the trailing number (524288) is the sampling rate. (Version 1 showed
4186  # double the 'rate' here)
4187  my $sampling_algorithm = 0;
4188  my $sample_adjustment = 0;
4189  chomp($header);
4190  my $type = "unknown";
4191  if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") {
4192    if (defined($6) && ($6 ne '')) {
4193      $type = $6;
4194      my $sample_period = $8;
4195      # $type is "heapprofile" for profiles generated by the
4196      # heap-profiler, and either "heap" or "heap_v2" for profiles
4197      # generated by sampling directly within tcmalloc.  It can also
4198      # be "growth" for heap-growth profiles.  The first is typically
4199      # found for profiles generated locally, and the others for
4200      # remote profiles.
4201      if (($type eq "heapprofile") || ($type !~ /heap/) ) {
4202        # No need to adjust for the sampling rate with heap-profiler-derived data
4203        $sampling_algorithm = 0;
4204      } elsif ($type =~ /_v2/) {
4205        $sampling_algorithm = 2;     # version 2 sampling
4206        if (defined($sample_period) && ($sample_period ne '')) {
4207          $sample_adjustment = int($sample_period);
4208        }
4209      } else {
4210        $sampling_algorithm = 1;     # version 1 sampling
4211        if (defined($sample_period) && ($sample_period ne '')) {
4212          $sample_adjustment = int($sample_period)/2;
4213        }
4214      }
4215    } else {
4216      # We detect whether or not this is a remote-heap profile by checking
4217      # that the total-allocated stats ($n2,$s2) are exactly the
4218      # same as the in-use stats ($n1,$s1).  It is remotely conceivable
4219      # that a non-remote-heap profile may pass this check, but it is hard
4220      # to imagine how that could happen.
4221      # In this case it's so old it's guaranteed to be remote-heap version 1.
4222      my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
4223      if (($n1 == $n2) && ($s1 == $s2)) {
4224        # This is likely to be a remote-heap based sample profile
4225        $sampling_algorithm = 1;
4226      }
4227    }
4228  }
4229
4230  if ($sampling_algorithm > 0) {
4231    # For remote-heap generated profiles, adjust the counts and sizes to
4232    # account for the sample rate (we sample once every 128KB by default).
4233    if ($sample_adjustment == 0) {
4234      # Turn on profile adjustment.
4235      $sample_adjustment = 128*1024;
4236      print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n";
4237    } else {
4238      printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n",
4239                     $sample_adjustment);
4240    }
4241    if ($sampling_algorithm > 1) {
4242      # We don't bother printing anything for the original version (version 1)
4243      printf STDERR "Heap version $sampling_algorithm\n";
4244    }
4245  }
4246
4247  my $profile = {};
4248  my $pcs = {};
4249  my $map = "";
4250
4251  while (<PROFILE>) {
4252    s/\r//g;         # turn windows-looking lines into unix-looking lines
4253    if (/^MAPPED_LIBRARIES:/) {
4254      $map .= ReadMappedLibraries(*PROFILE);
4255      last;
4256    }
4257
4258    if (/^--- Memory map:/) {
4259      $map .= ReadMemoryMap(*PROFILE);
4260      last;
4261    }
4262
4263    # Read entry of the form:
4264    #  <count1>: <bytes1> [<count2>: <bytes2>] @ a1 a2 a3 ... an
4265    s/^\s*//;
4266    s/\s*$//;
4267    if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) {
4268      my $stack = $5;
4269      my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4);
4270      my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm,
4271                                 $n1, $s1, $n2, $s2);
4272      AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
4273    }
4274  }
4275
4276  my $r = {};
4277  $r->{version} = "heap";
4278  $r->{period} = 1;
4279  $r->{profile} = $profile;
4280  $r->{libs} = ParseLibraries($prog, $map, $pcs);
4281  $r->{pcs} = $pcs;
4282  return $r;
4283}
4284
4285sub ReadThreadedHeapProfile {
4286  my ($prog, $fname, $header) = @_;
4287
4288  my $index = HeapProfileIndex();
4289  my $sampling_algorithm = 0;
4290  my $sample_adjustment = 0;
4291  chomp($header);
4292  my $type = "unknown";
4293  # Assuming a very specific type of header for now.
4294  if ($header =~ m"^heap_v2/(\d+)") {
4295    $type = "_v2";
4296    $sampling_algorithm = 2;
4297    $sample_adjustment = int($1);
4298  }
4299  if ($type ne "_v2" || !defined($sample_adjustment)) {
4300    die "Threaded heap profiles require v2 sampling with a sample rate\n";
4301  }
4302
4303  my $profile = {};
4304  my $thread_profiles = {};
4305  my $pcs = {};
4306  my $map = "";
4307  my $stack = "";
4308
4309  while (<PROFILE>) {
4310    s/\r//g;
4311    if (/^MAPPED_LIBRARIES:/) {
4312      $map .= ReadMappedLibraries(*PROFILE);
4313      last;
4314    }
4315
4316    if (/^--- Memory map:/) {
4317      $map .= ReadMemoryMap(*PROFILE);
4318      last;
4319    }
4320
4321    # Read entry of the form:
4322    # @ a1 a2 ... an
4323    #   t*: <count1>: <bytes1> [<count2>: <bytes2>]
4324    #   t1: <count1>: <bytes1> [<count2>: <bytes2>]
4325    #     ...
4326    #   tn: <count1>: <bytes1> [<count2>: <bytes2>]
4327    s/^\s*//;
4328    s/\s*$//;
4329    if (m/^@\s+(.*)$/) {
4330      $stack = $1;
4331    } elsif (m/^\s*(t(\*|\d+)):\s+(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]$/) {
4332      if ($stack eq "") {
4333        # Still in the header, so this is just a per-thread summary.
4334        next;
4335      }
4336      my $thread = $2;
4337      my ($n1, $s1, $n2, $s2) = ($3, $4, $5, $6);
4338      my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm,
4339                                 $n1, $s1, $n2, $s2);
4340      if ($thread eq "*") {
4341        AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]);
4342      } else {
4343        if (!exists($thread_profiles->{$thread})) {
4344          $thread_profiles->{$thread} = {};
4345        }
4346        AddEntries($thread_profiles->{$thread}, $pcs,
4347                   FixCallerAddresses($stack), $counts[$index]);
4348      }
4349    }
4350  }
4351
4352  my $r = {};
4353  $r->{version} = "heap";
4354  $r->{period} = 1;
4355  $r->{profile} = $profile;
4356  $r->{threads} = $thread_profiles;
4357  $r->{libs} = ParseLibraries($prog, $map, $pcs);
4358  $r->{pcs} = $pcs;
4359  return $r;
4360}
4361
4362sub ReadSynchProfile {
4363  my $prog = shift;
4364  local *PROFILE = shift;
4365  my $header = shift;
4366
4367  my $map = '';
4368  my $profile = {};
4369  my $pcs = {};
4370  my $sampling_period = 1;
4371  my $cyclespernanosec = 2.8;   # Default assumption for old binaries
4372  my $seen_clockrate = 0;
4373  my $line;
4374
4375  my $index = 0;
4376  if ($main::opt_total_delay) {
4377    $index = 0;
4378  } elsif ($main::opt_contentions) {
4379    $index = 1;
4380  } elsif ($main::opt_mean_delay) {
4381    $index = 2;
4382  }
4383
4384  while ( $line = <PROFILE> ) {
4385    $line =~ s/\r//g;      # turn windows-looking lines into unix-looking lines
4386    if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) {
4387      my ($cycles, $count, $stack) = ($1, $2, $3);
4388
4389      # Convert cycles to nanoseconds
4390      $cycles /= $cyclespernanosec;
4391
4392      # Adjust for sampling done by application
4393      $cycles *= $sampling_period;
4394      $count *= $sampling_period;
4395
4396      my @values = ($cycles, $count, $cycles / $count);
4397      AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]);
4398
4399    } elsif ( $line =~ /^(slow release).*thread \d+  \@\s*(.*?)\s*$/ ||
4400              $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) {
4401      my ($cycles, $stack) = ($1, $2);
4402      if ($cycles !~ /^\d+$/) {
4403        next;
4404      }
4405
4406      # Convert cycles to nanoseconds
4407      $cycles /= $cyclespernanosec;
4408
4409      # Adjust for sampling done by application
4410      $cycles *= $sampling_period;
4411
4412      AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles);
4413
4414    } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) {
4415      my ($variable, $value) = ($1,$2);
4416      for ($variable, $value) {
4417        s/^\s+//;
4418        s/\s+$//;
4419      }
4420      if ($variable eq "cycles/second") {
4421        $cyclespernanosec = $value / 1e9;
4422        $seen_clockrate = 1;
4423      } elsif ($variable eq "sampling period") {
4424        $sampling_period = $value;
4425      } elsif ($variable eq "ms since reset") {
4426        # Currently nothing is done with this value in jeprof
4427        # So we just silently ignore it for now
4428      } elsif ($variable eq "discarded samples") {
4429        # Currently nothing is done with this value in jeprof
4430        # So we just silently ignore it for now
4431      } else {
4432        printf STDERR ("Ignoring unnknown variable in /contention output: " .
4433                       "'%s' = '%s'\n",$variable,$value);
4434      }
4435    } else {
4436      # Memory map entry
4437      $map .= $line;
4438    }
4439  }
4440
4441  if (!$seen_clockrate) {
4442    printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n",
4443                   $cyclespernanosec);
4444  }
4445
4446  my $r = {};
4447  $r->{version} = 0;
4448  $r->{period} = $sampling_period;
4449  $r->{profile} = $profile;
4450  $r->{libs} = ParseLibraries($prog, $map, $pcs);
4451  $r->{pcs} = $pcs;
4452  return $r;
4453}
4454
4455# Given a hex value in the form "0x1abcd" or "1abcd", return either
4456# "0001abcd" or "000000000001abcd", depending on the current (global)
4457# address length.
4458sub HexExtend {
4459  my $addr = shift;
4460
4461  $addr =~ s/^(0x)?0*//;
4462  my $zeros_needed = $address_length - length($addr);
4463  if ($zeros_needed < 0) {
4464    printf STDERR "Warning: address $addr is longer than address length $address_length\n";
4465    return $addr;
4466  }
4467  return ("0" x $zeros_needed) . $addr;
4468}
4469
4470##### Symbol extraction #####
4471
4472# Aggressively search the lib_prefix values for the given library
4473# If all else fails, just return the name of the library unmodified.
4474# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so"
4475# it will search the following locations in this order, until it finds a file:
4476#   /my/path/lib/dir/mylib.so
4477#   /other/path/lib/dir/mylib.so
4478#   /my/path/dir/mylib.so
4479#   /other/path/dir/mylib.so
4480#   /my/path/mylib.so
4481#   /other/path/mylib.so
4482#   /lib/dir/mylib.so              (returned as last resort)
4483sub FindLibrary {
4484  my $file = shift;
4485  my $suffix = $file;
4486
4487  # Search for the library as described above
4488  do {
4489    foreach my $prefix (@prefix_list) {
4490      my $fullpath = $prefix . $suffix;
4491      if (-e $fullpath) {
4492        return $fullpath;
4493      }
4494    }
4495  } while ($suffix =~ s|^/[^/]+/|/|);
4496  return $file;
4497}
4498
4499# Return path to library with debugging symbols.
4500# For libc libraries, the copy in /usr/lib/debug contains debugging symbols
4501sub DebuggingLibrary {
4502  my $file = shift;
4503
4504  if ($file !~ m|^/|) {
4505    return undef;
4506  }
4507
4508  # Find debug symbol file if it's named after the library's name.
4509
4510  if (-f "/usr/lib/debug$file") {
4511    if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file\n"; }
4512    return "/usr/lib/debug$file";
4513  } elsif (-f "/usr/lib/debug$file.debug") {
4514    if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file.debug\n"; }
4515    return "/usr/lib/debug$file.debug";
4516  }
4517
4518  if(!$main::opt_debug_syms_by_id) {
4519    if($main::opt_debug) { print STDERR "no debug symbols found for $file\n" };
4520    return undef;
4521  }
4522
4523  # Find debug file if it's named after the library's build ID.
4524
4525  my $readelf = '';
4526  if (!$main::gave_up_on_elfutils) {
4527    $readelf = qx/eu-readelf -n ${file}/;
4528    if ($?) {
4529      print STDERR "Cannot run eu-readelf. To use --debug-syms-by-id you must be on Linux, with elfutils installed.\n";
4530      $main::gave_up_on_elfutils = 1;
4531      return undef;
4532    }
4533    my $buildID = $1 if $readelf =~ /Build ID: ([A-Fa-f0-9]+)/s;
4534    if (defined $buildID && length $buildID > 0) {
4535      my $symbolFile = '/usr/lib/debug/.build-id/' . substr($buildID, 0, 2) . '/' . substr($buildID, 2) . '.debug';
4536      if (-e $symbolFile) {
4537        if($main::opt_debug) { print STDERR "found debug symbol file $symbolFile for $file\n" };
4538        return $symbolFile;
4539      } else {
4540        if($main::opt_debug) { print STDERR "no debug symbol file found for $file, build ID: $buildID\n" };
4541        return undef;
4542      }
4543    }
4544  }
4545
4546  if($main::opt_debug) { print STDERR "no debug symbols found for $file, build ID unknown\n" };
4547  return undef;
4548}
4549
4550
4551# Parse text section header of a library using objdump
4552sub ParseTextSectionHeaderFromObjdump {
4553  my $lib = shift;
4554
4555  my $size = undef;
4556  my $vma;
4557  my $file_offset;
4558  # Get objdump output from the library file to figure out how to
4559  # map between mapped addresses and addresses in the library.
4560  my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib);
4561  open(OBJDUMP, "$cmd |") || error("$cmd: $!\n");
4562  while (<OBJDUMP>) {
4563    s/\r//g;         # turn windows-looking lines into unix-looking lines
4564    # Idx Name          Size      VMA       LMA       File off  Algn
4565    #  10 .text         00104b2c  420156f0  420156f0  000156f0  2**4
4566    # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file
4567    # offset may still be 8.  But AddressSub below will still handle that.
4568    my @x = split;
4569    if (($#x >= 6) && ($x[1] eq '.text')) {
4570      $size = $x[2];
4571      $vma = $x[3];
4572      $file_offset = $x[5];
4573      last;
4574    }
4575  }
4576  close(OBJDUMP);
4577
4578  if (!defined($size)) {
4579    return undef;
4580  }
4581
4582  my $r = {};
4583  $r->{size} = $size;
4584  $r->{vma} = $vma;
4585  $r->{file_offset} = $file_offset;
4586
4587  return $r;
4588}
4589
4590# Parse text section header of a library using otool (on OS X)
4591sub ParseTextSectionHeaderFromOtool {
4592  my $lib = shift;
4593
4594  my $size = undef;
4595  my $vma = undef;
4596  my $file_offset = undef;
4597  # Get otool output from the library file to figure out how to
4598  # map between mapped addresses and addresses in the library.
4599  my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib);
4600  open(OTOOL, "$command |") || error("$command: $!\n");
4601  my $cmd = "";
4602  my $sectname = "";
4603  my $segname = "";
4604  foreach my $line (<OTOOL>) {
4605    $line =~ s/\r//g;      # turn windows-looking lines into unix-looking lines
4606    # Load command <#>
4607    #       cmd LC_SEGMENT
4608    # [...]
4609    # Section
4610    #   sectname __text
4611    #    segname __TEXT
4612    #       addr 0x000009f8
4613    #       size 0x00018b9e
4614    #     offset 2552
4615    #      align 2^2 (4)
4616    # We will need to strip off the leading 0x from the hex addresses,
4617    # and convert the offset into hex.
4618    if ($line =~ /Load command/) {
4619      $cmd = "";
4620      $sectname = "";
4621      $segname = "";
4622    } elsif ($line =~ /Section/) {
4623      $sectname = "";
4624      $segname = "";
4625    } elsif ($line =~ /cmd (\w+)/) {
4626      $cmd = $1;
4627    } elsif ($line =~ /sectname (\w+)/) {
4628      $sectname = $1;
4629    } elsif ($line =~ /segname (\w+)/) {
4630      $segname = $1;
4631    } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") &&
4632               $sectname eq "__text" &&
4633               $segname eq "__TEXT")) {
4634      next;
4635    } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) {
4636      $vma = $1;
4637    } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) {
4638      $size = $1;
4639    } elsif ($line =~ /\boffset ([0-9]+)/) {
4640      $file_offset = sprintf("%016x", $1);
4641    }
4642    if (defined($vma) && defined($size) && defined($file_offset)) {
4643      last;
4644    }
4645  }
4646  close(OTOOL);
4647
4648  if (!defined($vma) || !defined($size) || !defined($file_offset)) {
4649     return undef;
4650  }
4651
4652  my $r = {};
4653  $r->{size} = $size;
4654  $r->{vma} = $vma;
4655  $r->{file_offset} = $file_offset;
4656
4657  return $r;
4658}
4659
4660sub ParseTextSectionHeader {
4661  # obj_tool_map("otool") is only defined if we're in a Mach-O environment
4662  if (defined($obj_tool_map{"otool"})) {
4663    my $r = ParseTextSectionHeaderFromOtool(@_);
4664    if (defined($r)){
4665      return $r;
4666    }
4667  }
4668  # If otool doesn't work, or we don't have it, fall back to objdump
4669  return ParseTextSectionHeaderFromObjdump(@_);
4670}
4671
4672# Split /proc/pid/maps dump into a list of libraries
4673sub ParseLibraries {
4674  return if $main::use_symbol_page;  # We don't need libraries info.
4675  my $prog = Cwd::abs_path(shift);
4676  my $map = shift;
4677  my $pcs = shift;
4678
4679  my $result = [];
4680  my $h = "[a-f0-9]+";
4681  my $zero_offset = HexExtend("0");
4682
4683  my $buildvar = "";
4684  foreach my $l (split("\n", $map)) {
4685    if ($l =~ m/^\s*build=(.*)$/) {
4686      $buildvar = $1;
4687    }
4688
4689    my $start;
4690    my $finish;
4691    my $offset;
4692    my $lib;
4693    if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) {
4694      # Full line from /proc/self/maps.  Example:
4695      #   40000000-40015000 r-xp 00000000 03:01 12845071   /lib/ld-2.3.2.so
4696      $start = HexExtend($1);
4697      $finish = HexExtend($2);
4698      $offset = HexExtend($3);
4699      $lib = $4;
4700      $lib =~ s|\\|/|g;     # turn windows-style paths into unix-style paths
4701    } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) {
4702      # Cooked line from DumpAddressMap.  Example:
4703      #   40000000-40015000: /lib/ld-2.3.2.so
4704      $start = HexExtend($1);
4705      $finish = HexExtend($2);
4706      $offset = $zero_offset;
4707      $lib = $3;
4708    } elsif (($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+)$/i) && ($4 eq $prog)) {
4709      # PIEs and address space randomization do not play well with our
4710      # default assumption that main executable is at lowest
4711      # addresses. So we're detecting main executable in
4712      # /proc/self/maps as well.
4713      $start = HexExtend($1);
4714      $finish = HexExtend($2);
4715      $offset = HexExtend($3);
4716      $lib = $4;
4717      $lib =~ s|\\|/|g;     # turn windows-style paths into unix-style paths
4718    }
4719    # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in
4720    # function procfs_doprocmap (sys/fs/procfs/procfs_map.c)
4721    #
4722    # Example:
4723    # 0x800600000 0x80061a000 26 0 0xfffff800035a0000 r-x 75 33 0x1004 COW NC vnode /libexec/ld-elf.s
4724    # o.1 NCH -1
4725    elsif ($l =~ /^(0x$h)\s(0x$h)\s\d+\s\d+\s0x$h\sr-x\s\d+\s\d+\s0x\d+\s(COW|NCO)\s(NC|NNC)\svnode\s(\S+\.so(\.\d+)*)/) {
4726      $start = HexExtend($1);
4727      $finish = HexExtend($2);
4728      $offset = $zero_offset;
4729      $lib = FindLibrary($5);
4730
4731    } else {
4732      next;
4733    }
4734
4735    # Expand "$build" variable if available
4736    $lib =~ s/\$build\b/$buildvar/g;
4737
4738    $lib = FindLibrary($lib);
4739
4740    # Check for pre-relocated libraries, which use pre-relocated symbol tables
4741    # and thus require adjusting the offset that we'll use to translate
4742    # VM addresses into symbol table addresses.
4743    # Only do this if we're not going to fetch the symbol table from a
4744    # debugging copy of the library.
4745    if (!DebuggingLibrary($lib)) {
4746      my $text = ParseTextSectionHeader($lib);
4747      if (defined($text)) {
4748         my $vma_offset = AddressSub($text->{vma}, $text->{file_offset});
4749         $offset = AddressAdd($offset, $vma_offset);
4750      }
4751    }
4752
4753    if($main::opt_debug) { printf STDERR "$start:$finish ($offset) $lib\n"; }
4754    push(@{$result}, [$lib, $start, $finish, $offset]);
4755  }
4756
4757  # Append special entry for additional library (not relocated)
4758  if ($main::opt_lib ne "") {
4759    my $text = ParseTextSectionHeader($main::opt_lib);
4760    if (defined($text)) {
4761       my $start = $text->{vma};
4762       my $finish = AddressAdd($start, $text->{size});
4763
4764       push(@{$result}, [$main::opt_lib, $start, $finish, $start]);
4765    }
4766  }
4767
4768  # Append special entry for the main program.  This covers
4769  # 0..max_pc_value_seen, so that we assume pc values not found in one
4770  # of the library ranges will be treated as coming from the main
4771  # program binary.
4772  my $min_pc = HexExtend("0");
4773  my $max_pc = $min_pc;          # find the maximal PC value in any sample
4774  foreach my $pc (keys(%{$pcs})) {
4775    if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); }
4776  }
4777  push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]);
4778
4779  return $result;
4780}
4781
4782# Add two hex addresses of length $address_length.
4783# Run jeprof --test for unit test if this is changed.
4784sub AddressAdd {
4785  my $addr1 = shift;
4786  my $addr2 = shift;
4787  my $sum;
4788
4789  if ($address_length == 8) {
4790    # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
4791    $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16);
4792    return sprintf("%08x", $sum);
4793
4794  } else {
4795    # Do the addition in 7-nibble chunks to trivialize carry handling.
4796
4797    if ($main::opt_debug and $main::opt_test) {
4798      print STDERR "AddressAdd $addr1 + $addr2 = ";
4799    }
4800
4801    my $a1 = substr($addr1,-7);
4802    $addr1 = substr($addr1,0,-7);
4803    my $a2 = substr($addr2,-7);
4804    $addr2 = substr($addr2,0,-7);
4805    $sum = hex($a1) + hex($a2);
4806    my $c = 0;
4807    if ($sum > 0xfffffff) {
4808      $c = 1;
4809      $sum -= 0x10000000;
4810    }
4811    my $r = sprintf("%07x", $sum);
4812
4813    $a1 = substr($addr1,-7);
4814    $addr1 = substr($addr1,0,-7);
4815    $a2 = substr($addr2,-7);
4816    $addr2 = substr($addr2,0,-7);
4817    $sum = hex($a1) + hex($a2) + $c;
4818    $c = 0;
4819    if ($sum > 0xfffffff) {
4820      $c = 1;
4821      $sum -= 0x10000000;
4822    }
4823    $r = sprintf("%07x", $sum) . $r;
4824
4825    $sum = hex($addr1) + hex($addr2) + $c;
4826    if ($sum > 0xff) { $sum -= 0x100; }
4827    $r = sprintf("%02x", $sum) . $r;
4828
4829    if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; }
4830
4831    return $r;
4832  }
4833}
4834
4835
4836# Subtract two hex addresses of length $address_length.
4837# Run jeprof --test for unit test if this is changed.
4838sub AddressSub {
4839  my $addr1 = shift;
4840  my $addr2 = shift;
4841  my $diff;
4842
4843  if ($address_length == 8) {
4844    # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
4845    $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16);
4846    return sprintf("%08x", $diff);
4847
4848  } else {
4849    # Do the addition in 7-nibble chunks to trivialize borrow handling.
4850    # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; }
4851
4852    my $a1 = hex(substr($addr1,-7));
4853    $addr1 = substr($addr1,0,-7);
4854    my $a2 = hex(substr($addr2,-7));
4855    $addr2 = substr($addr2,0,-7);
4856    my $b = 0;
4857    if ($a2 > $a1) {
4858      $b = 1;
4859      $a1 += 0x10000000;
4860    }
4861    $diff = $a1 - $a2;
4862    my $r = sprintf("%07x", $diff);
4863
4864    $a1 = hex(substr($addr1,-7));
4865    $addr1 = substr($addr1,0,-7);
4866    $a2 = hex(substr($addr2,-7)) + $b;
4867    $addr2 = substr($addr2,0,-7);
4868    $b = 0;
4869    if ($a2 > $a1) {
4870      $b = 1;
4871      $a1 += 0x10000000;
4872    }
4873    $diff = $a1 - $a2;
4874    $r = sprintf("%07x", $diff) . $r;
4875
4876    $a1 = hex($addr1);
4877    $a2 = hex($addr2) + $b;
4878    if ($a2 > $a1) { $a1 += 0x100; }
4879    $diff = $a1 - $a2;
4880    $r = sprintf("%02x", $diff) . $r;
4881
4882    # if ($main::opt_debug) { print STDERR "$r\n"; }
4883
4884    return $r;
4885  }
4886}
4887
4888# Increment a hex addresses of length $address_length.
4889# Run jeprof --test for unit test if this is changed.
4890sub AddressInc {
4891  my $addr = shift;
4892  my $sum;
4893
4894  if ($address_length == 8) {
4895    # Perl doesn't cope with wraparound arithmetic, so do it explicitly:
4896    $sum = (hex($addr)+1) % (0x10000000 * 16);
4897    return sprintf("%08x", $sum);
4898
4899  } else {
4900    # Do the addition in 7-nibble chunks to trivialize carry handling.
4901    # We are always doing this to step through the addresses in a function,
4902    # and will almost never overflow the first chunk, so we check for this
4903    # case and exit early.
4904
4905    # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; }
4906
4907    my $a1 = substr($addr,-7);
4908    $addr = substr($addr,0,-7);
4909    $sum = hex($a1) + 1;
4910    my $r = sprintf("%07x", $sum);
4911    if ($sum <= 0xfffffff) {
4912      $r = $addr . $r;
4913      # if ($main::opt_debug) { print STDERR "$r\n"; }
4914      return HexExtend($r);
4915    } else {
4916      $r = "0000000";
4917    }
4918
4919    $a1 = substr($addr,-7);
4920    $addr = substr($addr,0,-7);
4921    $sum = hex($a1) + 1;
4922    $r = sprintf("%07x", $sum) . $r;
4923    if ($sum <= 0xfffffff) {
4924      $r = $addr . $r;
4925      # if ($main::opt_debug) { print STDERR "$r\n"; }
4926      return HexExtend($r);
4927    } else {
4928      $r = "00000000000000";
4929    }
4930
4931    $sum = hex($addr) + 1;
4932    if ($sum > 0xff) { $sum -= 0x100; }
4933    $r = sprintf("%02x", $sum) . $r;
4934
4935    # if ($main::opt_debug) { print STDERR "$r\n"; }
4936    return $r;
4937  }
4938}
4939
4940# Extract symbols for all PC values found in profile
4941sub ExtractSymbols {
4942  my $libs = shift;
4943  my $pcset = shift;
4944
4945  my $symbols = {};
4946
4947  # Map each PC value to the containing library.  To make this faster,
4948  # we sort libraries by their starting pc value (highest first), and
4949  # advance through the libraries as we advance the pc.  Sometimes the
4950  # addresses of libraries may overlap with the addresses of the main
4951  # binary, so to make sure the libraries 'win', we iterate over the
4952  # libraries in reverse order (which assumes the binary doesn't start
4953  # in the middle of a library, which seems a fair assumption).
4954  my @pcs = (sort { $a cmp $b } keys(%{$pcset}));  # pcset is 0-extended strings
4955  foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) {
4956    my $libname = $lib->[0];
4957    my $start = $lib->[1];
4958    my $finish = $lib->[2];
4959    my $offset = $lib->[3];
4960
4961    # Use debug library if it exists
4962    my $debug_libname = DebuggingLibrary($libname);
4963    if ($debug_libname) {
4964        $libname = $debug_libname;
4965    }
4966
4967    # Get list of pcs that belong in this library.
4968    my $contained = [];
4969    my ($start_pc_index, $finish_pc_index);
4970    # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index].
4971    for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0;
4972         $finish_pc_index--) {
4973      last if $pcs[$finish_pc_index - 1] le $finish;
4974    }
4975    # Find smallest start_pc_index such that $start <= $pc[$start_pc_index].
4976    for ($start_pc_index = $finish_pc_index; $start_pc_index > 0;
4977         $start_pc_index--) {
4978      last if $pcs[$start_pc_index - 1] lt $start;
4979    }
4980    # This keeps PC values higher than $pc[$finish_pc_index] in @pcs,
4981    # in case there are overlaps in libraries and the main binary.
4982    @{$contained} = splice(@pcs, $start_pc_index,
4983                           $finish_pc_index - $start_pc_index);
4984    # Map to symbols
4985    MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols);
4986  }
4987
4988  return $symbols;
4989}
4990
4991# Map list of PC values to symbols for a given image
4992sub MapToSymbols {
4993  my $image = shift;
4994  my $offset = shift;
4995  my $pclist = shift;
4996  my $symbols = shift;
4997
4998  my $debug = 0;
4999
5000  # Ignore empty binaries
5001  if ($#{$pclist} < 0) { return; }
5002
5003  # Figure out the addr2line command to use
5004  my $addr2line = $obj_tool_map{"addr2line"};
5005  my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image);
5006  if (exists $obj_tool_map{"addr2line_pdb"}) {
5007    $addr2line = $obj_tool_map{"addr2line_pdb"};
5008    $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image);
5009  }
5010
5011  # If "addr2line" isn't installed on the system at all, just use
5012  # nm to get what info we can (function names, but not line numbers).
5013  if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) {
5014    MapSymbolsWithNM($image, $offset, $pclist, $symbols);
5015    return;
5016  }
5017
5018  # "addr2line -i" can produce a variable number of lines per input
5019  # address, with no separator that allows us to tell when data for
5020  # the next address starts.  So we find the address for a special
5021  # symbol (_fini) and interleave this address between all real
5022  # addresses passed to addr2line.  The name of this special symbol
5023  # can then be used as a separator.
5024  $sep_address = undef;  # May be filled in by MapSymbolsWithNM()
5025  my $nm_symbols = {};
5026  MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols);
5027  if (defined($sep_address)) {
5028    # Only add " -i" to addr2line if the binary supports it.
5029    # addr2line --help returns 0, but not if it sees an unknown flag first.
5030    if (system("$cmd -i --help >$dev_null 2>&1") == 0) {
5031      $cmd .= " -i";
5032    } else {
5033      $sep_address = undef;   # no need for sep_address if we don't support -i
5034    }
5035  }
5036
5037  # Make file with all PC values with intervening 'sep_address' so
5038  # that we can reliably detect the end of inlined function list
5039  open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n");
5040  if ($debug) { print("---- $image ---\n"); }
5041  for (my $i = 0; $i <= $#{$pclist}; $i++) {
5042    # addr2line always reads hex addresses, and does not need '0x' prefix.
5043    if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); }
5044    printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset));
5045    if (defined($sep_address)) {
5046      printf ADDRESSES ("%s\n", $sep_address);
5047    }
5048  }
5049  close(ADDRESSES);
5050  if ($debug) {
5051    print("----\n");
5052    system("cat", $main::tmpfile_sym);
5053    print("----\n");
5054    system("$cmd < " . ShellEscape($main::tmpfile_sym));
5055    print("----\n");
5056  }
5057
5058  open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |")
5059      || error("$cmd: $!\n");
5060  my $count = 0;   # Index in pclist
5061  while (<SYMBOLS>) {
5062    # Read fullfunction and filelineinfo from next pair of lines
5063    s/\r?\n$//g;
5064    my $fullfunction = $_;
5065    $_ = <SYMBOLS>;
5066    s/\r?\n$//g;
5067    my $filelinenum = $_;
5068
5069    if (defined($sep_address) && $fullfunction eq $sep_symbol) {
5070      # Terminating marker for data for this address
5071      $count++;
5072      next;
5073    }
5074
5075    $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths
5076
5077    my $pcstr = $pclist->[$count];
5078    my $function = ShortFunctionName($fullfunction);
5079    my $nms = $nm_symbols->{$pcstr};
5080    if (defined($nms)) {
5081      if ($fullfunction eq '??') {
5082        # nm found a symbol for us.
5083        $function = $nms->[0];
5084        $fullfunction = $nms->[2];
5085      } else {
5086	# MapSymbolsWithNM tags each routine with its starting address,
5087	# useful in case the image has multiple occurrences of this
5088	# routine.  (It uses a syntax that resembles template parameters,
5089	# that are automatically stripped out by ShortFunctionName().)
5090	# addr2line does not provide the same information.  So we check
5091	# if nm disambiguated our symbol, and if so take the annotated
5092	# (nm) version of the routine-name.  TODO(csilvers): this won't
5093	# catch overloaded, inlined symbols, which nm doesn't see.
5094	# Better would be to do a check similar to nm's, in this fn.
5095	if ($nms->[2] =~ m/^\Q$function\E/) {  # sanity check it's the right fn
5096	  $function = $nms->[0];
5097	  $fullfunction = $nms->[2];
5098	}
5099      }
5100    }
5101
5102    # Prepend to accumulated symbols for pcstr
5103    # (so that caller comes before callee)
5104    my $sym = $symbols->{$pcstr};
5105    if (!defined($sym)) {
5106      $sym = [];
5107      $symbols->{$pcstr} = $sym;
5108    }
5109    unshift(@{$sym}, $function, $filelinenum, $fullfunction);
5110    if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); }
5111    if (!defined($sep_address)) {
5112      # Inlining is off, so this entry ends immediately
5113      $count++;
5114    }
5115  }
5116  close(SYMBOLS);
5117}
5118
5119# Use nm to map the list of referenced PCs to symbols.  Return true iff we
5120# are able to read procedure information via nm.
5121sub MapSymbolsWithNM {
5122  my $image = shift;
5123  my $offset = shift;
5124  my $pclist = shift;
5125  my $symbols = shift;
5126
5127  # Get nm output sorted by increasing address
5128  my $symbol_table = GetProcedureBoundaries($image, ".");
5129  if (!%{$symbol_table}) {
5130    return 0;
5131  }
5132  # Start addresses are already the right length (8 or 16 hex digits).
5133  my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] }
5134    keys(%{$symbol_table});
5135
5136  if ($#names < 0) {
5137    # No symbols: just use addresses
5138    foreach my $pc (@{$pclist}) {
5139      my $pcstr = "0x" . $pc;
5140      $symbols->{$pc} = [$pcstr, "?", $pcstr];
5141    }
5142    return 0;
5143  }
5144
5145  # Sort addresses so we can do a join against nm output
5146  my $index = 0;
5147  my $fullname = $names[0];
5148  my $name = ShortFunctionName($fullname);
5149  foreach my $pc (sort { $a cmp $b } @{$pclist}) {
5150    # Adjust for mapped offset
5151    my $mpc = AddressSub($pc, $offset);
5152    while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){
5153      $index++;
5154      $fullname = $names[$index];
5155      $name = ShortFunctionName($fullname);
5156    }
5157    if ($mpc lt $symbol_table->{$fullname}->[1]) {
5158      $symbols->{$pc} = [$name, "?", $fullname];
5159    } else {
5160      my $pcstr = "0x" . $pc;
5161      $symbols->{$pc} = [$pcstr, "?", $pcstr];
5162    }
5163  }
5164  return 1;
5165}
5166
5167sub ShortFunctionName {
5168  my $function = shift;
5169  while ($function =~ s/\([^()]*\)(\s*const)?//g) { }   # Argument types
5170  while ($function =~ s/<[^<>]*>//g)  { }    # Remove template arguments
5171  $function =~ s/^.*\s+(\w+::)/$1/;          # Remove leading type
5172  return $function;
5173}
5174
5175# Trim overly long symbols found in disassembler output
5176sub CleanDisassembly {
5177  my $d = shift;
5178  while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax)
5179  while ($d =~ s/(\w+)<[^<>]*>/$1/g)  { }       # Remove template arguments
5180  return $d;
5181}
5182
5183# Clean file name for display
5184sub CleanFileName {
5185  my ($f) = @_;
5186  $f =~ s|^/proc/self/cwd/||;
5187  $f =~ s|^\./||;
5188  return $f;
5189}
5190
5191# Make address relative to section and clean up for display
5192sub UnparseAddress {
5193  my ($offset, $address) = @_;
5194  $address = AddressSub($address, $offset);
5195  $address =~ s/^0x//;
5196  $address =~ s/^0*//;
5197  return $address;
5198}
5199
5200##### Miscellaneous #####
5201
5202# Find the right versions of the above object tools to use.  The
5203# argument is the program file being analyzed, and should be an ELF
5204# 32-bit or ELF 64-bit executable file.  The location of the tools
5205# is determined by considering the following options in this order:
5206#   1) --tools option, if set
5207#   2) JEPROF_TOOLS environment variable, if set
5208#   3) the environment
5209sub ConfigureObjTools {
5210  my $prog_file = shift;
5211
5212  # Check for the existence of $prog_file because /usr/bin/file does not
5213  # predictably return error status in prod.
5214  (-e $prog_file)  || error("$prog_file does not exist.\n");
5215
5216  my $file_type = undef;
5217  if (-e "/usr/bin/file") {
5218    # Follow symlinks (at least for systems where "file" supports that).
5219    my $escaped_prog_file = ShellEscape($prog_file);
5220    $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null ||
5221                  /usr/bin/file $escaped_prog_file`;
5222  } elsif ($^O == "MSWin32") {
5223    $file_type = "MS Windows";
5224  } else {
5225    print STDERR "WARNING: Can't determine the file type of $prog_file";
5226  }
5227
5228  if ($file_type =~ /64-bit/) {
5229    # Change $address_length to 16 if the program file is ELF 64-bit.
5230    # We can't detect this from many (most?) heap or lock contention
5231    # profiles, since the actual addresses referenced are generally in low
5232    # memory even for 64-bit programs.
5233    $address_length = 16;
5234  }
5235
5236  if ($file_type =~ /MS Windows/) {
5237    # For windows, we provide a version of nm and addr2line as part of
5238    # the opensource release, which is capable of parsing
5239    # Windows-style PDB executables.  It should live in the path, or
5240    # in the same directory as jeprof.
5241    $obj_tool_map{"nm_pdb"} = "nm-pdb";
5242    $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb";
5243  }
5244
5245  if ($file_type =~ /Mach-O/) {
5246    # OS X uses otool to examine Mach-O files, rather than objdump.
5247    $obj_tool_map{"otool"} = "otool";
5248    $obj_tool_map{"addr2line"} = "false";  # no addr2line
5249    $obj_tool_map{"objdump"} = "false";  # no objdump
5250  }
5251
5252  # Go fill in %obj_tool_map with the pathnames to use:
5253  foreach my $tool (keys %obj_tool_map) {
5254    $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool});
5255  }
5256}
5257
5258# Returns the path of a caller-specified object tool.  If --tools or
5259# JEPROF_TOOLS are specified, then returns the full path to the tool
5260# with that prefix.  Otherwise, returns the path unmodified (which
5261# means we will look for it on PATH).
5262sub ConfigureTool {
5263  my $tool = shift;
5264  my $path;
5265
5266  # --tools (or $JEPROF_TOOLS) is a comma separated list, where each
5267  # item is either a) a pathname prefix, or b) a map of the form
5268  # <tool>:<path>.  First we look for an entry of type (b) for our
5269  # tool.  If one is found, we use it.  Otherwise, we consider all the
5270  # pathname prefixes in turn, until one yields an existing file.  If
5271  # none does, we use a default path.
5272  my $tools = $main::opt_tools || $ENV{"JEPROF_TOOLS"} || "";
5273  if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) {
5274    $path = $2;
5275    # TODO(csilvers): sanity-check that $path exists?  Hard if it's relative.
5276  } elsif ($tools ne '') {
5277    foreach my $prefix (split(',', $tools)) {
5278      next if ($prefix =~ /:/);    # ignore "tool:fullpath" entries in the list
5279      if (-x $prefix . $tool) {
5280        $path = $prefix . $tool;
5281        last;
5282      }
5283    }
5284    if (!$path) {
5285      error("No '$tool' found with prefix specified by " .
5286            "--tools (or \$JEPROF_TOOLS) '$tools'\n");
5287    }
5288  } else {
5289    # ... otherwise use the version that exists in the same directory as
5290    # jeprof.  If there's nothing there, use $PATH.
5291    $0 =~ m,[^/]*$,;     # this is everything after the last slash
5292    my $dirname = $`;    # this is everything up to and including the last slash
5293    if (-x "$dirname$tool") {
5294      $path = "$dirname$tool";
5295    } else {
5296      $path = $tool;
5297    }
5298  }
5299  if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; }
5300  return $path;
5301}
5302
5303sub ShellEscape {
5304  my @escaped_words = ();
5305  foreach my $word (@_) {
5306    my $escaped_word = $word;
5307    if ($word =~ m![^a-zA-Z0-9/.,_=-]!) {  # check for anything not in whitelist
5308      $escaped_word =~ s/'/'\\''/;
5309      $escaped_word = "'$escaped_word'";
5310    }
5311    push(@escaped_words, $escaped_word);
5312  }
5313  return join(" ", @escaped_words);
5314}
5315
5316sub cleanup {
5317  unlink($main::tmpfile_sym);
5318  unlink(keys %main::tempnames);
5319
5320  # We leave any collected profiles in $HOME/jeprof in case the user wants
5321  # to look at them later.  We print a message informing them of this.
5322  if ((scalar(@main::profile_files) > 0) &&
5323      defined($main::collected_profile)) {
5324    if (scalar(@main::profile_files) == 1) {
5325      print STDERR "Dynamically gathered profile is in $main::collected_profile\n";
5326    }
5327    print STDERR "If you want to investigate this profile further, you can do:\n";
5328    print STDERR "\n";
5329    print STDERR "  jeprof \\\n";
5330    print STDERR "    $main::prog \\\n";
5331    print STDERR "    $main::collected_profile\n";
5332    print STDERR "\n";
5333  }
5334}
5335
5336sub sighandler {
5337  cleanup();
5338  exit(1);
5339}
5340
5341sub error {
5342  my $msg = shift;
5343  print STDERR $msg;
5344  cleanup();
5345  exit(1);
5346}
5347
5348
5349# Run $nm_command and get all the resulting procedure boundaries whose
5350# names match "$regexp" and returns them in a hashtable mapping from
5351# procedure name to a two-element vector of [start address, end address]
5352sub GetProcedureBoundariesViaNm {
5353  my $escaped_nm_command = shift;    # shell-escaped
5354  my $regexp = shift;
5355
5356  my $symbol_table = {};
5357  open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n");
5358  my $last_start = "0";
5359  my $routine = "";
5360  while (<NM>) {
5361    s/\r//g;         # turn windows-looking lines into unix-looking lines
5362    if (m/^\s*([0-9a-f]+) (.) (..*)/) {
5363      my $start_val = $1;
5364      my $type = $2;
5365      my $this_routine = $3;
5366
5367      # It's possible for two symbols to share the same address, if
5368      # one is a zero-length variable (like __start_google_malloc) or
5369      # one symbol is a weak alias to another (like __libc_malloc).
5370      # In such cases, we want to ignore all values except for the
5371      # actual symbol, which in nm-speak has type "T".  The logic
5372      # below does this, though it's a bit tricky: what happens when
5373      # we have a series of lines with the same address, is the first
5374      # one gets queued up to be processed.  However, it won't
5375      # *actually* be processed until later, when we read a line with
5376      # a different address.  That means that as long as we're reading
5377      # lines with the same address, we have a chance to replace that
5378      # item in the queue, which we do whenever we see a 'T' entry --
5379      # that is, a line with type 'T'.  If we never see a 'T' entry,
5380      # we'll just go ahead and process the first entry (which never
5381      # got touched in the queue), and ignore the others.
5382      if ($start_val eq $last_start && $type =~ /t/i) {
5383        # We are the 'T' symbol at this address, replace previous symbol.
5384        $routine = $this_routine;
5385        next;
5386      } elsif ($start_val eq $last_start) {
5387        # We're not the 'T' symbol at this address, so ignore us.
5388        next;
5389      }
5390
5391      if ($this_routine eq $sep_symbol) {
5392        $sep_address = HexExtend($start_val);
5393      }
5394
5395      # Tag this routine with the starting address in case the image
5396      # has multiple occurrences of this routine.  We use a syntax
5397      # that resembles template parameters that are automatically
5398      # stripped out by ShortFunctionName()
5399      $this_routine .= "<$start_val>";
5400
5401      if (defined($routine) && $routine =~ m/$regexp/) {
5402        $symbol_table->{$routine} = [HexExtend($last_start),
5403                                     HexExtend($start_val)];
5404      }
5405      $last_start = $start_val;
5406      $routine = $this_routine;
5407    } elsif (m/^Loaded image name: (.+)/) {
5408      # The win32 nm workalike emits information about the binary it is using.
5409      if ($main::opt_debug) { print STDERR "Using Image $1\n"; }
5410    } elsif (m/^PDB file name: (.+)/) {
5411      # The win32 nm workalike emits information about the pdb it is using.
5412      if ($main::opt_debug) { print STDERR "Using PDB $1\n"; }
5413    }
5414  }
5415  close(NM);
5416  # Handle the last line in the nm output.  Unfortunately, we don't know
5417  # how big this last symbol is, because we don't know how big the file
5418  # is.  For now, we just give it a size of 0.
5419  # TODO(csilvers): do better here.
5420  if (defined($routine) && $routine =~ m/$regexp/) {
5421    $symbol_table->{$routine} = [HexExtend($last_start),
5422                                 HexExtend($last_start)];
5423  }
5424  return $symbol_table;
5425}
5426
5427# Gets the procedure boundaries for all routines in "$image" whose names
5428# match "$regexp" and returns them in a hashtable mapping from procedure
5429# name to a two-element vector of [start address, end address].
5430# Will return an empty map if nm is not installed or not working properly.
5431sub GetProcedureBoundaries {
5432  my $image = shift;
5433  my $regexp = shift;
5434
5435  # If $image doesn't start with /, then put ./ in front of it.  This works
5436  # around an obnoxious bug in our probing of nm -f behavior.
5437  # "nm -f $image" is supposed to fail on GNU nm, but if:
5438  #
5439  # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND
5440  # b. you have a.out in your current directory (a not uncommon occurrence)
5441  #
5442  # then "nm -f $image" succeeds because -f only looks at the first letter of
5443  # the argument, which looks valid because it's [BbSsPp], and then since
5444  # there's no image provided, it looks for a.out and finds it.
5445  #
5446  # This regex makes sure that $image starts with . or /, forcing the -f
5447  # parsing to fail since . and / are not valid formats.
5448  $image =~ s#^[^/]#./$&#;
5449
5450  # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
5451  my $debugging = DebuggingLibrary($image);
5452  if ($debugging) {
5453    $image = $debugging;
5454  }
5455
5456  my $nm = $obj_tool_map{"nm"};
5457  my $cppfilt = $obj_tool_map{"c++filt"};
5458
5459  # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm
5460  # binary doesn't support --demangle.  In addition, for OS X we need
5461  # to use the -f flag to get 'flat' nm output (otherwise we don't sort
5462  # properly and get incorrect results).  Unfortunately, GNU nm uses -f
5463  # in an incompatible way.  So first we test whether our nm supports
5464  # --demangle and -f.
5465  my $demangle_flag = "";
5466  my $cppfilt_flag = "";
5467  my $to_devnull = ">$dev_null 2>&1";
5468  if (system(ShellEscape($nm, "--demangle", $image) . $to_devnull) == 0) {
5469    # In this mode, we do "nm --demangle <foo>"
5470    $demangle_flag = "--demangle";
5471    $cppfilt_flag = "";
5472  } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) {
5473    # In this mode, we do "nm <foo> | c++filt"
5474    $cppfilt_flag = " | " . ShellEscape($cppfilt);
5475  };
5476  my $flatten_flag = "";
5477  if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) {
5478    $flatten_flag = "-f";
5479  }
5480
5481  # Finally, in the case $imagie isn't a debug library, we try again with
5482  # -D to at least get *exported* symbols.  If we can't use --demangle,
5483  # we use c++filt instead, if it exists on this system.
5484  my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag,
5485                                 $image) . " 2>$dev_null $cppfilt_flag",
5486                     ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag,
5487                                 $image) . " 2>$dev_null $cppfilt_flag",
5488                     # 6nm is for Go binaries
5489                     ShellEscape("6nm", "$image") . " 2>$dev_null | sort",
5490                     );
5491
5492  # If the executable is an MS Windows PDB-format executable, we'll
5493  # have set up obj_tool_map("nm_pdb").  In this case, we actually
5494  # want to use both unix nm and windows-specific nm_pdb, since
5495  # PDB-format executables can apparently include dwarf .o files.
5496  if (exists $obj_tool_map{"nm_pdb"}) {
5497    push(@nm_commands,
5498         ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image)
5499         . " 2>$dev_null");
5500  }
5501
5502  foreach my $nm_command (@nm_commands) {
5503    my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp);
5504    return $symbol_table if (%{$symbol_table});
5505  }
5506  my $symbol_table = {};
5507  return $symbol_table;
5508}
5509
5510
5511# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings.
5512# To make them more readable, we add underscores at interesting places.
5513# This routine removes the underscores, producing the canonical representation
5514# used by jeprof to represent addresses, particularly in the tested routines.
5515sub CanonicalHex {
5516  my $arg = shift;
5517  return join '', (split '_',$arg);
5518}
5519
5520
5521# Unit test for AddressAdd:
5522sub AddressAddUnitTest {
5523  my $test_data_8 = shift;
5524  my $test_data_16 = shift;
5525  my $error_count = 0;
5526  my $fail_count = 0;
5527  my $pass_count = 0;
5528  # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n";
5529
5530  # First a few 8-nibble addresses.  Note that this implementation uses
5531  # plain old arithmetic, so a quick sanity check along with verifying what
5532  # happens to overflow (we want it to wrap):
5533  $address_length = 8;
5534  foreach my $row (@{$test_data_8}) {
5535    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5536    my $sum = AddressAdd ($row->[0], $row->[1]);
5537    if ($sum ne $row->[2]) {
5538      printf STDERR "ERROR: %s != %s + %s = %s\n", $sum,
5539             $row->[0], $row->[1], $row->[2];
5540      ++$fail_count;
5541    } else {
5542      ++$pass_count;
5543    }
5544  }
5545  printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n",
5546         $pass_count, $fail_count;
5547  $error_count = $fail_count;
5548  $fail_count = 0;
5549  $pass_count = 0;
5550
5551  # Now 16-nibble addresses.
5552  $address_length = 16;
5553  foreach my $row (@{$test_data_16}) {
5554    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5555    my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1]));
5556    my $expected = join '', (split '_',$row->[2]);
5557    if ($sum ne CanonicalHex($row->[2])) {
5558      printf STDERR "ERROR: %s != %s + %s = %s\n", $sum,
5559             $row->[0], $row->[1], $row->[2];
5560      ++$fail_count;
5561    } else {
5562      ++$pass_count;
5563    }
5564  }
5565  printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n",
5566         $pass_count, $fail_count;
5567  $error_count += $fail_count;
5568
5569  return $error_count;
5570}
5571
5572
5573# Unit test for AddressSub:
5574sub AddressSubUnitTest {
5575  my $test_data_8 = shift;
5576  my $test_data_16 = shift;
5577  my $error_count = 0;
5578  my $fail_count = 0;
5579  my $pass_count = 0;
5580  # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n";
5581
5582  # First a few 8-nibble addresses.  Note that this implementation uses
5583  # plain old arithmetic, so a quick sanity check along with verifying what
5584  # happens to overflow (we want it to wrap):
5585  $address_length = 8;
5586  foreach my $row (@{$test_data_8}) {
5587    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5588    my $sum = AddressSub ($row->[0], $row->[1]);
5589    if ($sum ne $row->[3]) {
5590      printf STDERR "ERROR: %s != %s - %s = %s\n", $sum,
5591             $row->[0], $row->[1], $row->[3];
5592      ++$fail_count;
5593    } else {
5594      ++$pass_count;
5595    }
5596  }
5597  printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n",
5598         $pass_count, $fail_count;
5599  $error_count = $fail_count;
5600  $fail_count = 0;
5601  $pass_count = 0;
5602
5603  # Now 16-nibble addresses.
5604  $address_length = 16;
5605  foreach my $row (@{$test_data_16}) {
5606    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5607    my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1]));
5608    if ($sum ne CanonicalHex($row->[3])) {
5609      printf STDERR "ERROR: %s != %s - %s = %s\n", $sum,
5610             $row->[0], $row->[1], $row->[3];
5611      ++$fail_count;
5612    } else {
5613      ++$pass_count;
5614    }
5615  }
5616  printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n",
5617         $pass_count, $fail_count;
5618  $error_count += $fail_count;
5619
5620  return $error_count;
5621}
5622
5623
5624# Unit test for AddressInc:
5625sub AddressIncUnitTest {
5626  my $test_data_8 = shift;
5627  my $test_data_16 = shift;
5628  my $error_count = 0;
5629  my $fail_count = 0;
5630  my $pass_count = 0;
5631  # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n";
5632
5633  # First a few 8-nibble addresses.  Note that this implementation uses
5634  # plain old arithmetic, so a quick sanity check along with verifying what
5635  # happens to overflow (we want it to wrap):
5636  $address_length = 8;
5637  foreach my $row (@{$test_data_8}) {
5638    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5639    my $sum = AddressInc ($row->[0]);
5640    if ($sum ne $row->[4]) {
5641      printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum,
5642             $row->[0], $row->[4];
5643      ++$fail_count;
5644    } else {
5645      ++$pass_count;
5646    }
5647  }
5648  printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n",
5649         $pass_count, $fail_count;
5650  $error_count = $fail_count;
5651  $fail_count = 0;
5652  $pass_count = 0;
5653
5654  # Now 16-nibble addresses.
5655  $address_length = 16;
5656  foreach my $row (@{$test_data_16}) {
5657    if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; }
5658    my $sum = AddressInc (CanonicalHex($row->[0]));
5659    if ($sum ne CanonicalHex($row->[4])) {
5660      printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum,
5661             $row->[0], $row->[4];
5662      ++$fail_count;
5663    } else {
5664      ++$pass_count;
5665    }
5666  }
5667  printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n",
5668         $pass_count, $fail_count;
5669  $error_count += $fail_count;
5670
5671  return $error_count;
5672}
5673
5674
5675# Driver for unit tests.
5676# Currently just the address add/subtract/increment routines for 64-bit.
5677sub RunUnitTests {
5678  my $error_count = 0;
5679
5680  # This is a list of tuples [a, b, a+b, a-b, a+1]
5681  my $unit_test_data_8 = [
5682    [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)],
5683    [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)],
5684    [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)],
5685    [qw(00000001 ffffffff 00000000 00000002 00000002)],
5686    [qw(00000001 fffffff0 fffffff1 00000011 00000002)],
5687  ];
5688  my $unit_test_data_16 = [
5689    # The implementation handles data in 7-nibble chunks, so those are the
5690    # interesting boundaries.
5691    [qw(aaaaaaaa 50505050
5692        00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)],
5693    [qw(50505050 aaaaaaaa
5694        00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)],
5695    [qw(ffffffff aaaaaaaa
5696        00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)],
5697    [qw(00000001 ffffffff
5698        00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)],
5699    [qw(00000001 fffffff0
5700        00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)],
5701
5702    [qw(00_a00000a_aaaaaaa 50505050
5703        00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)],
5704    [qw(0f_fff0005_0505050 aaaaaaaa
5705        0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)],
5706    [qw(00_000000f_fffffff 01_800000a_aaaaaaa
5707        01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)],
5708    [qw(00_0000000_0000001 ff_fffffff_fffffff
5709        00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)],
5710    [qw(00_0000000_0000001 ff_fffffff_ffffff0
5711        ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)],
5712  ];
5713
5714  $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16);
5715  $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16);
5716  $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16);
5717  if ($error_count > 0) {
5718    print STDERR $error_count, " errors: FAILED\n";
5719  } else {
5720    print STDERR "PASS\n";
5721  }
5722  exit ($error_count);
5723}
5724