1#!/usr/bin/env python 2# 3# This is a tool that works like debug location coverage calculator. 4# It parses the llvm-dwarfdump --statistics output by reporting it 5# in a more human readable way. 6# 7 8from __future__ import print_function 9import argparse 10import os 11import sys 12from json import loads 13from math import ceil 14from collections import OrderedDict 15from subprocess import Popen, PIPE 16 17# This special value has been used to mark statistics that overflowed. 18TAINT_VALUE = "tainted" 19 20# Initialize the plot. 21def init_plot(plt): 22 plt.title('Debug Location Statistics', fontweight='bold') 23 plt.xlabel('location buckets') 24 plt.ylabel('number of variables in the location buckets') 25 plt.xticks(rotation=45, fontsize='x-small') 26 plt.yticks() 27 28# Finalize the plot. 29def finish_plot(plt): 30 plt.legend() 31 plt.grid(color='grey', which='major', axis='y', linestyle='-', linewidth=0.3) 32 plt.savefig('locstats.png') 33 print('The plot was saved within "locstats.png".') 34 35# Holds the debug location statistics. 36class LocationStats: 37 def __init__(self, file_name, variables_total, variables_total_locstats, 38 variables_with_loc, variables_scope_bytes_covered, variables_scope_bytes, 39 variables_coverage_map): 40 self.file_name = file_name 41 self.variables_total = variables_total 42 self.variables_total_locstats = variables_total_locstats 43 self.variables_with_loc = variables_with_loc 44 self.scope_bytes_covered = variables_scope_bytes_covered 45 self.scope_bytes = variables_scope_bytes 46 self.variables_coverage_map = variables_coverage_map 47 48 # Get the PC ranges coverage. 49 def get_pc_coverage(self): 50 if self.scope_bytes_covered == TAINT_VALUE or \ 51 self.scope_bytes == TAINT_VALUE: 52 return TAINT_VALUE 53 pc_ranges_covered = int(ceil(self.scope_bytes_covered * 100.0) \ 54 / self.scope_bytes) 55 return pc_ranges_covered 56 57 # Pretty print the debug location buckets. 58 def pretty_print(self): 59 if self.scope_bytes == 0: 60 print ('No scope bytes found.') 61 return -1 62 63 pc_ranges_covered = self.get_pc_coverage() 64 variables_coverage_per_map = {} 65 for cov_bucket in coverage_buckets(): 66 variables_coverage_per_map[cov_bucket] = None 67 if self.variables_coverage_map[cov_bucket] == TAINT_VALUE or \ 68 self.variables_total_locstats == TAINT_VALUE: 69 variables_coverage_per_map[cov_bucket] = TAINT_VALUE 70 else: 71 variables_coverage_per_map[cov_bucket] = \ 72 int(ceil(self.variables_coverage_map[cov_bucket] * 100.0) \ 73 / self.variables_total_locstats) 74 75 print (' =================================================') 76 print (' Debug Location Statistics ') 77 print (' =================================================') 78 print (' cov% samples percentage(~) ') 79 print (' -------------------------------------------------') 80 for cov_bucket in coverage_buckets(): 81 if self.variables_coverage_map[cov_bucket] or \ 82 self.variables_total_locstats == TAINT_VALUE: 83 print (' {0:10} {1:8} {2:3}%'. \ 84 format(cov_bucket, self.variables_coverage_map[cov_bucket], \ 85 variables_coverage_per_map[cov_bucket])) 86 else: 87 print (' {0:10} {1:8d} {2:3d}%'. \ 88 format(cov_bucket, self.variables_coverage_map[cov_bucket], \ 89 variables_coverage_per_map[cov_bucket])) 90 print (' =================================================') 91 print (' -the number of debug variables processed: ' \ 92 + str(self.variables_total_locstats)) 93 print (' -PC ranges covered: ' + str(pc_ranges_covered) + '%') 94 95 # Only if we are processing all the variables output the total 96 # availability. 97 if self.variables_total and self.variables_with_loc: 98 total_availability = None 99 if self.variables_total == TAINT_VALUE or \ 100 self.variables_with_loc == TAINT_VALUE: 101 total_availability = TAINT_VALUE 102 else: 103 total_availability = int(ceil(self.variables_with_loc * 100.0) \ 104 / self.variables_total) 105 print (' -------------------------------------------------') 106 print (' -total availability: ' + str(total_availability) + '%') 107 print (' =================================================') 108 109 return 0 110 111 # Draw a plot representing the location buckets. 112 def draw_plot(self): 113 from matplotlib import pyplot as plt 114 115 buckets = range(len(self.variables_coverage_map)) 116 plt.figure(figsize=(12, 8)) 117 init_plot(plt) 118 plt.bar(buckets, self.variables_coverage_map.values(), align='center', 119 tick_label=self.variables_coverage_map.keys(), 120 label='variables of {}'.format(self.file_name)) 121 122 # Place the text box with the coverage info. 123 pc_ranges_covered = self.get_pc_coverage() 124 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) 125 plt.text(0.02, 0.90, 'PC ranges covered: {}%'.format(pc_ranges_covered), 126 transform=plt.gca().transAxes, fontsize=12, 127 verticalalignment='top', bbox=props) 128 129 finish_plot(plt) 130 131 # Compare the two LocationStats objects and draw a plot showing 132 # the difference. 133 def draw_location_diff(self, locstats_to_compare): 134 from matplotlib import pyplot as plt 135 136 pc_ranges_covered = self.get_pc_coverage() 137 pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage() 138 139 buckets = range(len(self.variables_coverage_map)) 140 buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map)) 141 142 fig = plt.figure(figsize=(12, 8)) 143 ax = fig.add_subplot(111) 144 init_plot(plt) 145 146 comparison_keys = list(coverage_buckets()) 147 ax.bar(buckets, self.variables_coverage_map.values(), align='edge', 148 width=0.4, 149 label='variables of {}'.format(self.file_name)) 150 ax.bar(buckets_to_compare, 151 locstats_to_compare.variables_coverage_map.values(), 152 color='r', align='edge', width=-0.4, 153 label='variables of {}'.format(locstats_to_compare.file_name)) 154 ax.set_xticks(range(len(comparison_keys))) 155 ax.set_xticklabels(comparison_keys) 156 157 props = dict(boxstyle='round', facecolor='wheat', alpha=0.5) 158 plt.text(0.02, 0.88, 159 '{} PC ranges covered: {}%'. \ 160 format(self.file_name, pc_ranges_covered), 161 transform=plt.gca().transAxes, fontsize=12, 162 verticalalignment='top', bbox=props) 163 plt.text(0.02, 0.83, 164 '{} PC ranges covered: {}%'. \ 165 format(locstats_to_compare.file_name, 166 pc_ranges_covered_to_compare), 167 transform=plt.gca().transAxes, fontsize=12, 168 verticalalignment='top', bbox=props) 169 170 finish_plot(plt) 171 172# Define the location buckets. 173def coverage_buckets(): 174 yield '0%' 175 yield '(0%,10%)' 176 for start in range(10, 91, 10): 177 yield '[{0}%,{1}%)'.format(start, start + 10) 178 yield '100%' 179 180# Parse the JSON representing the debug statistics, and create a 181# LocationStats object. 182def parse_locstats(opts, binary): 183 # These will be different due to different options enabled. 184 variables_total = None 185 variables_total_locstats = None 186 variables_with_loc = None 187 variables_scope_bytes_covered = None 188 variables_scope_bytes = None 189 variables_scope_bytes_entry_values = None 190 variables_coverage_map = OrderedDict() 191 192 # Get the directory of the LLVM tools. 193 llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), \ 194 "llvm-dwarfdump") 195 # The statistics llvm-dwarfdump option. 196 llvm_dwarfdump_stats_opt = "--statistics" 197 198 # Generate the stats with the llvm-dwarfdump. 199 subproc = Popen([llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], \ 200 stdin=PIPE, stdout=PIPE, stderr=PIPE, \ 201 universal_newlines = True) 202 cmd_stdout, cmd_stderr = subproc.communicate() 203 204 # TODO: Handle errors that are coming from llvm-dwarfdump. 205 206 # Get the JSON and parse it. 207 json_parsed = None 208 209 try: 210 json_parsed = loads(cmd_stdout) 211 except: 212 print ('error: No valid llvm-dwarfdump statistics found.') 213 sys.exit(1) 214 215 # TODO: Parse the statistics Version from JSON. 216 217 def init_field(name): 218 if json_parsed[name] == 'overflowed': 219 print ('warning: "' + name + '" field overflowed.') 220 return TAINT_VALUE 221 return json_parsed[name] 222 223 if opts.only_variables: 224 # Read the JSON only for local variables. 225 variables_total_locstats = \ 226 init_field('#local vars processed by location statistics') 227 variables_scope_bytes_covered = \ 228 init_field('sum_all_local_vars(#bytes in parent scope covered' \ 229 ' by DW_AT_location)') 230 variables_scope_bytes = \ 231 init_field('sum_all_local_vars(#bytes in parent scope)') 232 if not opts.ignore_debug_entry_values: 233 for cov_bucket in coverage_buckets(): 234 cov_category = "#local vars with {} of parent scope covered " \ 235 "by DW_AT_location".format(cov_bucket) 236 variables_coverage_map[cov_bucket] = init_field(cov_category) 237 else: 238 variables_scope_bytes_entry_values = \ 239 init_field('sum_all_local_vars(#bytes in parent scope ' \ 240 'covered by DW_OP_entry_value)') 241 if variables_scope_bytes_covered != TAINT_VALUE and \ 242 variables_scope_bytes_entry_values != TAINT_VALUE: 243 variables_scope_bytes_covered = variables_scope_bytes_covered \ 244 - variables_scope_bytes_entry_values 245 for cov_bucket in coverage_buckets(): 246 cov_category = \ 247 "#local vars - entry values with {} of parent scope " \ 248 "covered by DW_AT_location".format(cov_bucket) 249 variables_coverage_map[cov_bucket] = init_field(cov_category) 250 elif opts.only_formal_parameters: 251 # Read the JSON only for formal parameters. 252 variables_total_locstats = \ 253 init_field('#params processed by location statistics') 254 variables_scope_bytes_covered = \ 255 init_field('sum_all_params(#bytes in parent scope covered ' \ 256 'by DW_AT_location)') 257 variables_scope_bytes = \ 258 init_field('sum_all_params(#bytes in parent scope)') 259 if not opts.ignore_debug_entry_values: 260 for cov_bucket in coverage_buckets(): 261 cov_category = "#params with {} of parent scope covered " \ 262 "by DW_AT_location".format(cov_bucket) 263 variables_coverage_map[cov_bucket] = init_field(cov_category) 264 else: 265 variables_scope_bytes_entry_values = \ 266 init_field('sum_all_params(#bytes in parent scope covered ' \ 267 'by DW_OP_entry_value)') 268 if variables_scope_bytes_covered != TAINT_VALUE and \ 269 variables_scope_bytes_entry_values != TAINT_VALUE: 270 variables_scope_bytes_covered = variables_scope_bytes_covered \ 271 - variables_scope_bytes_entry_values 272 for cov_bucket in coverage_buckets(): 273 cov_category = \ 274 "#params - entry values with {} of parent scope covered" \ 275 " by DW_AT_location".format(cov_bucket) 276 variables_coverage_map[cov_bucket] = init_field(cov_category) 277 else: 278 # Read the JSON for both local variables and formal parameters. 279 variables_total = \ 280 init_field('#source variables') 281 variables_with_loc = init_field('#source variables with location') 282 variables_total_locstats = \ 283 init_field('#variables processed by location statistics') 284 variables_scope_bytes_covered = \ 285 init_field('sum_all_variables(#bytes in parent scope covered ' \ 286 'by DW_AT_location)') 287 variables_scope_bytes = \ 288 init_field('sum_all_variables(#bytes in parent scope)') 289 290 if not opts.ignore_debug_entry_values: 291 for cov_bucket in coverage_buckets(): 292 cov_category = "#variables with {} of parent scope covered " \ 293 "by DW_AT_location".format(cov_bucket) 294 variables_coverage_map[cov_bucket] = init_field(cov_category) 295 else: 296 variables_scope_bytes_entry_values = \ 297 init_field('sum_all_variables(#bytes in parent scope covered ' \ 298 'by DW_OP_entry_value)') 299 if variables_scope_bytes_covered != TAINT_VALUE and \ 300 variables_scope_bytes_entry_values != TAINT_VALUE: 301 variables_scope_bytes_covered = variables_scope_bytes_covered \ 302 - variables_scope_bytes_entry_values 303 for cov_bucket in coverage_buckets(): 304 cov_category = \ 305 "#variables - entry values with {} of parent scope covered " \ 306 "by DW_AT_location".format(cov_bucket) 307 variables_coverage_map[cov_bucket] = init_field(cov_category) 308 309 return LocationStats(binary, variables_total, variables_total_locstats, 310 variables_with_loc, variables_scope_bytes_covered, 311 variables_scope_bytes, variables_coverage_map) 312 313# Parse the program arguments. 314def parse_program_args(parser): 315 parser.add_argument('--only-variables', action='store_true', default=False, 316 help='calculate the location statistics only for local variables') 317 parser.add_argument('--only-formal-parameters', action='store_true', 318 default=False, 319 help='calculate the location statistics only for formal parameters') 320 parser.add_argument('--ignore-debug-entry-values', action='store_true', 321 default=False, 322 help='ignore the location statistics on locations with ' 323 'entry values') 324 parser.add_argument('--draw-plot', action='store_true', default=False, 325 help='show histogram of location buckets generated (requires ' 326 'matplotlib)') 327 parser.add_argument('--compare', action='store_true', default=False, 328 help='compare the debug location coverage on two files provided, ' 329 'and draw a plot showing the difference (requires ' 330 'matplotlib)') 331 parser.add_argument('file_names', nargs='+', type=str, help='file to process') 332 333 return parser.parse_args() 334 335# Verify that the program inputs meet the requirements. 336def verify_program_inputs(opts): 337 if len(sys.argv) < 2: 338 print ('error: Too few arguments.') 339 return False 340 341 if opts.only_variables and opts.only_formal_parameters: 342 print ('error: Please use just one --only* option.') 343 return False 344 345 if not opts.compare and len(opts.file_names) != 1: 346 print ('error: Please specify only one file to process.') 347 return False 348 349 if opts.compare and len(opts.file_names) != 2: 350 print ('error: Please specify two files to process.') 351 return False 352 353 if opts.draw_plot or opts.compare: 354 try: 355 import matplotlib 356 except ImportError: 357 print('error: matplotlib not found.') 358 return False 359 360 return True 361 362def Main(): 363 parser = argparse.ArgumentParser() 364 opts = parse_program_args(parser) 365 366 if not verify_program_inputs(opts): 367 parser.print_help() 368 sys.exit(1) 369 370 binary_file = opts.file_names[0] 371 locstats = parse_locstats(opts, binary_file) 372 373 if not opts.compare: 374 if opts.draw_plot: 375 # Draw a histogram representing the location buckets. 376 locstats.draw_plot() 377 else: 378 # Pretty print collected info on the standard output. 379 if locstats.pretty_print() == -1: 380 sys.exit(0) 381 else: 382 binary_file_to_compare = opts.file_names[1] 383 locstats_to_compare = parse_locstats(opts, binary_file_to_compare) 384 # Draw a plot showing the difference in debug location coverage between 385 # two files. 386 locstats.draw_location_diff(locstats_to_compare) 387 388if __name__ == '__main__': 389 Main() 390 sys.exit(0) 391