xref: /openbsd-src/gnu/llvm/llvm/utils/llvm-locstats/llvm-locstats.py (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
1#!/usr/bin/env python
2#
3# This is a tool that works like debug location coverage calculator.
4# It parses the llvm-dwarfdump --statistics output by reporting it
5# in a more human readable way.
6#
7
8from __future__ import print_function
9import argparse
10import os
11import sys
12from json import loads
13from math import ceil
14from collections import OrderedDict
15from subprocess import Popen, PIPE
16
17# This special value has been used to mark statistics that overflowed.
18TAINT_VALUE = "tainted"
19
20# Initialize the plot.
21def init_plot(plt):
22  plt.title('Debug Location Statistics', fontweight='bold')
23  plt.xlabel('location buckets')
24  plt.ylabel('number of variables in the location buckets')
25  plt.xticks(rotation=45, fontsize='x-small')
26  plt.yticks()
27
28# Finalize the plot.
29def finish_plot(plt):
30  plt.legend()
31  plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3)
32  plt.savefig('locstats.png')
33  print('The plot was saved within "locstats.png".')
34
35# Holds the debug location statistics.
36class LocationStats:
37  def __init__(self, file_name, variables_total, variables_total_locstats,
38    variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes,
39    variables_coverage_map):
40    self.file_name = file_name
41    self.variables_total = variables_total
42    self.variables_total_locstats = variables_total_locstats
43    self.variables_with_loc = variables_with_loc
44    self.scope_bytes_covered = variables_scope_bytes_covered
45    self.scope_bytes = variables_scope_bytes
46    self.variables_coverage_map = variables_coverage_map
47
48  # Get the PC ranges coverage.
49  def get_pc_coverage(self):
50    if self.scope_bytes_covered == TAINT_VALUE or \
51       self.scope_bytes == TAINT_VALUE:
52      return TAINT_VALUE
53    pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \
54                / self.scope_bytes)
55    return pc_ranges_covered
56
57  # Pretty print the debug location buckets.
58  def pretty_print(self):
59    if self.scope_bytes == 0:
60      print ('No scope bytes found.')
61      return -1
62
63    pc_ranges_covered = self.get_pc_coverage()
64    variables_coverage_per_map = {}
65    for cov_bucket in coverage_buckets():
66      variables_coverage_per_map[cov_bucket] = None
67      if self.variables_coverage_map[cov_bucket] == TAINT_VALUE or \
68         self.variables_total_locstats == TAINT_VALUE:
69        variables_coverage_per_map[cov_bucket] = TAINT_VALUE
70      else:
71        variables_coverage_per_map[cov_bucket] = \
72          int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \
73                   / self.variables_total_locstats)
74
75    print (' =================================================')
76    print ('            Debug Location Statistics       ')
77    print (' =================================================')
78    print ('     cov%           samples         percentage(~)  ')
79    print (' -------------------------------------------------')
80    for cov_bucket in coverage_buckets():
81      if self.variables_coverage_map[cov_bucket] or \
82         self.variables_total_locstats == TAINT_VALUE:
83        print ('   {0:10}     {1:8}              {2:3}%'. \
84          format(cov_bucket, self.variables_coverage_map[cov_bucket], \
85                 variables_coverage_per_map[cov_bucket]))
86      else:
87        print ('   {0:10}     {1:8d}              {2:3d}%'. \
88          format(cov_bucket, self.variables_coverage_map[cov_bucket], \
89                 variables_coverage_per_map[cov_bucket]))
90    print (' =================================================')
91    print (' -the number of debug variables processed: ' \
92      + str(self.variables_total_locstats))
93    print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%')
94
95    # Only if we are processing all the variables output the total
96    # availability.
97    if self.variables_total and self.variables_with_loc:
98      total_availability = None
99      if self.variables_total == TAINT_VALUE or \
100         self.variables_with_loc == TAINT_VALUE:
101        total_availability = TAINT_VALUE
102      else:
103        total_availability = int(ceil(self.variables_with_loc * 100.0) \
104                                      / self.variables_total)
105      print (' -------------------------------------------------')
106      print (' -total availability: ' + str(total_availability) + '%')
107    print (' =================================================')
108
109    return 0
110
111  # Draw a plot representing the location buckets.
112  def draw_plot(self):
113    from matplotlib import pyplot as plt
114
115    buckets = range(len(self.variables_coverage_map))
116    plt.figure(figsize=(12, 8))
117    init_plot(plt)
118    plt.bar(buckets, self.variables_coverage_map.values(), align='center',
119            tick_label=self.variables_coverage_map.keys(),
120            label='variables of {}'.format(self.file_name))
121
122    # Place the text box with the coverage info.
123    pc_ranges_covered = self.get_pc_coverage()
124    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
125    plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered),
126             transform=plt.gca().transAxes, fontsize=12,
127             verticalalignment='top', bbox=props)
128
129    finish_plot(plt)
130
131  # Compare the two LocationStats objects and draw a plot showing
132  # the difference.
133  def draw_location_diff(self, locstats_to_compare):
134    from matplotlib import pyplot as plt
135
136    pc_ranges_covered = self.get_pc_coverage()
137    pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
138
139    buckets = range(len(self.variables_coverage_map))
140    buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
141
142    fig = plt.figure(figsize=(12, 8))
143    ax = fig.add_subplot(111)
144    init_plot(plt)
145
146    comparison_keys = list(coverage_buckets())
147    ax.bar(buckets, self.variables_coverage_map.values(), align='edge',
148           width=0.4,
149           label='variables of {}'.format(self.file_name))
150    ax.bar(buckets_to_compare,
151           locstats_to_compare.variables_coverage_map.values(),
152           color='r', align='edge', width=-0.4,
153           label='variables of {}'.format(locstats_to_compare.file_name))
154    ax.set_xticks(range(len(comparison_keys)))
155    ax.set_xticklabels(comparison_keys)
156
157    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
158    plt.text(0.02, 0.88,
159             '{} PC ranges covered: {}%'. \
160             format(self.file_name, pc_ranges_covered),
161             transform=plt.gca().transAxes, fontsize=12,
162             verticalalignment='top', bbox=props)
163    plt.text(0.02, 0.83,
164             '{} PC ranges covered: {}%'. \
165             format(locstats_to_compare.file_name,
166                    pc_ranges_covered_to_compare),
167             transform=plt.gca().transAxes, fontsize=12,
168             verticalalignment='top', bbox=props)
169
170    finish_plot(plt)
171
172# Define the location buckets.
173def coverage_buckets():
174  yield '0%'
175  yield '(0%,10%)'
176  for start in range(10, 91, 10):
177    yield '[{0}%,{1}%)'.format(start, start + 10)
178  yield '100%'
179
180# Parse the JSON representing the debug statistics, and create a
181# LocationStats object.
182def parse_locstats(opts, binary):
183  # These will be different due to different options enabled.
184  variables_total = None
185  variables_total_locstats = None
186  variables_with_loc = None
187  variables_scope_bytes_covered = None
188  variables_scope_bytes = None
189  variables_scope_bytes_entry_values = None
190  variables_coverage_map = OrderedDict()
191
192  # Get the directory of the LLVM tools.
193  llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \
194                                    "llvm-dwarfdump")
195  # The statistics llvm-dwarfdump option.
196  llvm_dwarfdump_stats_opt = "--statistics"
197
198  # Generate the stats with the llvm-dwarfdump.
199  subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \
200                  stdin=PIPE, stdout=PIPE, stderr=PIPE, \
201                  universal_newlines = True)
202  cmd_stdout, cmd_stderr = subproc.communicate()
203
204  # TODO: Handle errors that are coming from llvm-dwarfdump.
205
206  # Get the JSON and parse it.
207  json_parsed = None
208
209  try:
210    json_parsed = loads(cmd_stdout)
211  except:
212    print ('error: No valid llvm-dwarfdump statistics found.')
213    sys.exit(1)
214
215  # TODO: Parse the statistics Version from JSON.
216
217  def init_field(name):
218    if json_parsed[name] == 'overflowed':
219      print ('warning: "' + name + '" field overflowed.')
220      return TAINT_VALUE
221    return json_parsed[name]
222
223  if opts.only_variables:
224    # Read the JSON only for local variables.
225    variables_total_locstats = \
226      init_field('#local vars processed by location statistics')
227    variables_scope_bytes_covered = \
228      init_field('sum_all_local_vars(#bytes in parent scope covered' \
229                  ' by DW_AT_location)')
230    variables_scope_bytes = \
231      init_field('sum_all_local_vars(#bytes in parent scope)')
232    if not opts.ignore_debug_entry_values:
233      for cov_bucket in coverage_buckets():
234        cov_category = "#local vars with {} of parent scope covered " \
235                       "by DW_AT_location".format(cov_bucket)
236        variables_coverage_map[cov_bucket] = init_field(cov_category)
237    else:
238      variables_scope_bytes_entry_values = \
239        init_field('sum_all_local_vars(#bytes in parent scope ' \
240                    'covered by DW_OP_entry_value)')
241      if variables_scope_bytes_covered != TAINT_VALUE and \
242         variables_scope_bytes_entry_values != TAINT_VALUE:
243        variables_scope_bytes_covered = variables_scope_bytes_covered \
244           - variables_scope_bytes_entry_values
245      for cov_bucket in coverage_buckets():
246        cov_category = \
247          "#local vars - entry values with {} of parent scope " \
248          "covered by DW_AT_location".format(cov_bucket)
249        variables_coverage_map[cov_bucket] = init_field(cov_category)
250  elif opts.only_formal_parameters:
251    # Read the JSON only for formal parameters.
252    variables_total_locstats = \
253      init_field('#params processed by location statistics')
254    variables_scope_bytes_covered = \
255      init_field('sum_all_params(#bytes in parent scope covered ' \
256                  'by DW_AT_location)')
257    variables_scope_bytes = \
258      init_field('sum_all_params(#bytes in parent scope)')
259    if not opts.ignore_debug_entry_values:
260      for cov_bucket in coverage_buckets():
261        cov_category = "#params with {} of parent scope covered " \
262                       "by DW_AT_location".format(cov_bucket)
263        variables_coverage_map[cov_bucket] = init_field(cov_category)
264    else:
265      variables_scope_bytes_entry_values = \
266        init_field('sum_all_params(#bytes in parent scope covered ' \
267                    'by DW_OP_entry_value)')
268      if variables_scope_bytes_covered != TAINT_VALUE and \
269         variables_scope_bytes_entry_values != TAINT_VALUE:
270        variables_scope_bytes_covered = variables_scope_bytes_covered \
271          - variables_scope_bytes_entry_values
272      for cov_bucket in coverage_buckets():
273        cov_category = \
274          "#params - entry values with {} of parent scope covered" \
275          " by DW_AT_location".format(cov_bucket)
276        variables_coverage_map[cov_bucket] = init_field(cov_category)
277  else:
278    # Read the JSON for both local variables and formal parameters.
279    variables_total = \
280      init_field('#source variables')
281    variables_with_loc = init_field('#source variables with location')
282    variables_total_locstats = \
283      init_field('#variables processed by location statistics')
284    variables_scope_bytes_covered = \
285      init_field('sum_all_variables(#bytes in parent scope covered ' \
286                  'by DW_AT_location)')
287    variables_scope_bytes = \
288      init_field('sum_all_variables(#bytes in parent scope)')
289
290    if not opts.ignore_debug_entry_values:
291      for cov_bucket in coverage_buckets():
292        cov_category = "#variables with {} of parent scope covered " \
293                       "by DW_AT_location".format(cov_bucket)
294        variables_coverage_map[cov_bucket] = init_field(cov_category)
295    else:
296      variables_scope_bytes_entry_values = \
297        init_field('sum_all_variables(#bytes in parent scope covered ' \
298                    'by DW_OP_entry_value)')
299      if variables_scope_bytes_covered != TAINT_VALUE and \
300         variables_scope_bytes_entry_values != TAINT_VALUE:
301        variables_scope_bytes_covered = variables_scope_bytes_covered \
302          - variables_scope_bytes_entry_values
303      for cov_bucket in coverage_buckets():
304        cov_category = \
305          "#variables - entry values with {} of parent scope covered " \
306          "by DW_AT_location".format(cov_bucket)
307        variables_coverage_map[cov_bucket] = init_field(cov_category)
308
309  return LocationStats(binary, variables_total, variables_total_locstats,
310                       variables_with_loc, variables_scope_bytes_covered,
311                       variables_scope_bytes, variables_coverage_map)
312
313# Parse the program arguments.
314def parse_program_args(parser):
315  parser.add_argument('--only-variables', action='store_true', default=False,
316            help='calculate the location statistics only for local variables')
317  parser.add_argument('--only-formal-parameters', action='store_true',
318            default=False,
319            help='calculate the location statistics only for formal parameters')
320  parser.add_argument('--ignore-debug-entry-values', action='store_true',
321            default=False,
322            help='ignore the location statistics on locations with '
323                 'entry values')
324  parser.add_argument('--draw-plot', action='store_true', default=False,
325            help='show histogram of location buckets generated (requires '
326                 'matplotlib)')
327  parser.add_argument('--compare', action='store_true', default=False,
328            help='compare the debug location coverage on two files provided, '
329                 'and draw a plot showing the difference  (requires '
330                 'matplotlib)')
331  parser.add_argument('file_names', nargs='+', type=str, help='file to process')
332
333  return parser.parse_args()
334
335# Verify that the program inputs meet the requirements.
336def verify_program_inputs(opts):
337  if len(sys.argv) < 2:
338    print ('error: Too few arguments.')
339    return False
340
341  if opts.only_variables and opts.only_formal_parameters:
342    print ('error: Please use just one --only* option.')
343    return False
344
345  if not opts.compare and len(opts.file_names) != 1:
346    print ('error: Please specify only one file to process.')
347    return False
348
349  if opts.compare and len(opts.file_names) != 2:
350    print ('error: Please specify two files to process.')
351    return False
352
353  if opts.draw_plot or opts.compare:
354    try:
355      import matplotlib
356    except ImportError:
357      print('error: matplotlib not found.')
358      return False
359
360  return True
361
362def Main():
363  parser = argparse.ArgumentParser()
364  opts = parse_program_args(parser)
365
366  if not verify_program_inputs(opts):
367    parser.print_help()
368    sys.exit(1)
369
370  binary_file = opts.file_names[0]
371  locstats = parse_locstats(opts, binary_file)
372
373  if not opts.compare:
374    if opts.draw_plot:
375      # Draw a histogram representing the location buckets.
376      locstats.draw_plot()
377    else:
378      # Pretty print collected info on the standard output.
379      if locstats.pretty_print() == -1:
380        sys.exit(0)
381  else:
382    binary_file_to_compare = opts.file_names[1]
383    locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
384    # Draw a plot showing the difference in debug location coverage between
385    # two files.
386    locstats.draw_location_diff(locstats_to_compare)
387
388if __name__ == '__main__':
389  Main()
390  sys.exit(0)
391