1#!/usr/bin/env python3 2# 3# This is a tool that works like debug location coverage calculator. 4# It parses the llvm-dwarfdump --statistics output by reporting it 5# in a more human readable way. 6# 7 8from __future__ import print_function 9import argparse 10import os 11import sys 12from json import loads 13from math import ceil 14from collections import OrderedDict 15from subprocess import Popen, PIPE 16 17# This special value has been used to mark statistics that overflowed. 18TAINT_VALUE = "tainted" 19 20# Initialize the plot. 21def init_plot(plt): 22 plt.title("Debug Location Statistics", fontweight="bold") 23 plt.xlabel("location buckets") 24 plt.ylabel("number of variables in the location buckets") 25 plt.xticks(rotation=45, fontsize="x-small") 26 plt.yticks() 27 28 29# Finalize the plot. 30def finish_plot(plt): 31 plt.legend() 32 plt.grid(color="grey", which="major", axis="y", linestyle="-", linewidth=0.3) 33 plt.savefig("locstats.png") 34 print('The plot was saved within "locstats.png".') 35 36 37# Holds the debug location statistics. 38class LocationStats: 39 def __init__( 40 self, 41 file_name, 42 variables_total, 43 variables_total_locstats, 44 variables_with_loc, 45 variables_scope_bytes_covered, 46 variables_scope_bytes, 47 variables_coverage_map, 48 ): 49 self.file_name = file_name 50 self.variables_total = variables_total 51 self.variables_total_locstats = variables_total_locstats 52 self.variables_with_loc = variables_with_loc 53 self.scope_bytes_covered = variables_scope_bytes_covered 54 self.scope_bytes = variables_scope_bytes 55 self.variables_coverage_map = variables_coverage_map 56 57 # Get the PC ranges coverage. 58 def get_pc_coverage(self): 59 if self.scope_bytes_covered == TAINT_VALUE or self.scope_bytes == TAINT_VALUE: 60 return TAINT_VALUE 61 pc_ranges_covered = int( 62 ceil(self.scope_bytes_covered * 100.0) / self.scope_bytes 63 ) 64 return pc_ranges_covered 65 66 # Pretty print the debug location buckets. 67 def pretty_print(self): 68 if self.scope_bytes == 0: 69 print("No scope bytes found.") 70 return -1 71 72 pc_ranges_covered = self.get_pc_coverage() 73 variables_coverage_per_map = {} 74 for cov_bucket in coverage_buckets(): 75 variables_coverage_per_map[cov_bucket] = None 76 if ( 77 self.variables_coverage_map[cov_bucket] == TAINT_VALUE 78 or self.variables_total_locstats == TAINT_VALUE 79 ): 80 variables_coverage_per_map[cov_bucket] = TAINT_VALUE 81 else: 82 variables_coverage_per_map[cov_bucket] = int( 83 ceil(self.variables_coverage_map[cov_bucket] * 100.0) 84 / self.variables_total_locstats 85 ) 86 87 print(" =================================================") 88 print(" Debug Location Statistics ") 89 print(" =================================================") 90 print(" cov% samples percentage(~) ") 91 print(" -------------------------------------------------") 92 for cov_bucket in coverage_buckets(): 93 if ( 94 self.variables_coverage_map[cov_bucket] 95 or self.variables_total_locstats == TAINT_VALUE 96 ): 97 print( 98 " {0:10} {1:8} {2:3}%".format( 99 cov_bucket, 100 self.variables_coverage_map[cov_bucket], 101 variables_coverage_per_map[cov_bucket], 102 ) 103 ) 104 else: 105 print( 106 " {0:10} {1:8d} {2:3d}%".format( 107 cov_bucket, 108 self.variables_coverage_map[cov_bucket], 109 variables_coverage_per_map[cov_bucket], 110 ) 111 ) 112 print(" =================================================") 113 print( 114 " -the number of debug variables processed: " 115 + str(self.variables_total_locstats) 116 ) 117 print(" -PC ranges covered: " + str(pc_ranges_covered) + "%") 118 119 # Only if we are processing all the variables output the total 120 # availability. 121 if self.variables_total and self.variables_with_loc: 122 total_availability = None 123 if ( 124 self.variables_total == TAINT_VALUE 125 or self.variables_with_loc == TAINT_VALUE 126 ): 127 total_availability = TAINT_VALUE 128 else: 129 total_availability = int( 130 ceil(self.variables_with_loc * 100.0) / self.variables_total 131 ) 132 print(" -------------------------------------------------") 133 print(" -total availability: " + str(total_availability) + "%") 134 print(" =================================================") 135 136 return 0 137 138 # Draw a plot representing the location buckets. 139 def draw_plot(self): 140 from matplotlib import pyplot as plt 141 142 buckets = range(len(self.variables_coverage_map)) 143 plt.figure(figsize=(12, 8)) 144 init_plot(plt) 145 plt.bar( 146 buckets, 147 self.variables_coverage_map.values(), 148 align="center", 149 tick_label=self.variables_coverage_map.keys(), 150 label="variables of {}".format(self.file_name), 151 ) 152 153 # Place the text box with the coverage info. 154 pc_ranges_covered = self.get_pc_coverage() 155 props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) 156 plt.text( 157 0.02, 158 0.90, 159 "PC ranges covered: {}%".format(pc_ranges_covered), 160 transform=plt.gca().transAxes, 161 fontsize=12, 162 verticalalignment="top", 163 bbox=props, 164 ) 165 166 finish_plot(plt) 167 168 # Compare the two LocationStats objects and draw a plot showing 169 # the difference. 170 def draw_location_diff(self, locstats_to_compare): 171 from matplotlib import pyplot as plt 172 173 pc_ranges_covered = self.get_pc_coverage() 174 pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage() 175 176 buckets = range(len(self.variables_coverage_map)) 177 buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map)) 178 179 fig = plt.figure(figsize=(12, 8)) 180 ax = fig.add_subplot(111) 181 init_plot(plt) 182 183 comparison_keys = list(coverage_buckets()) 184 ax.bar( 185 buckets, 186 self.variables_coverage_map.values(), 187 align="edge", 188 width=0.4, 189 label="variables of {}".format(self.file_name), 190 ) 191 ax.bar( 192 buckets_to_compare, 193 locstats_to_compare.variables_coverage_map.values(), 194 color="r", 195 align="edge", 196 width=-0.4, 197 label="variables of {}".format(locstats_to_compare.file_name), 198 ) 199 ax.set_xticks(range(len(comparison_keys))) 200 ax.set_xticklabels(comparison_keys) 201 202 props = dict(boxstyle="round", facecolor="wheat", alpha=0.5) 203 plt.text( 204 0.02, 205 0.88, 206 "{} PC ranges covered: {}%".format(self.file_name, pc_ranges_covered), 207 transform=plt.gca().transAxes, 208 fontsize=12, 209 verticalalignment="top", 210 bbox=props, 211 ) 212 plt.text( 213 0.02, 214 0.83, 215 "{} PC ranges covered: {}%".format( 216 locstats_to_compare.file_name, pc_ranges_covered_to_compare 217 ), 218 transform=plt.gca().transAxes, 219 fontsize=12, 220 verticalalignment="top", 221 bbox=props, 222 ) 223 224 finish_plot(plt) 225 226 227# Define the location buckets. 228def coverage_buckets(): 229 yield "0%" 230 yield "(0%,10%)" 231 for start in range(10, 91, 10): 232 yield "[{0}%,{1}%)".format(start, start + 10) 233 yield "100%" 234 235 236# Parse the JSON representing the debug statistics, and create a 237# LocationStats object. 238def parse_locstats(opts, binary): 239 # These will be different due to different options enabled. 240 variables_total = None 241 variables_total_locstats = None 242 variables_with_loc = None 243 variables_scope_bytes_covered = None 244 variables_scope_bytes = None 245 variables_scope_bytes_entry_values = None 246 variables_coverage_map = OrderedDict() 247 248 # Get the directory of the LLVM tools. 249 llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), "llvm-dwarfdump") 250 # The statistics llvm-dwarfdump option. 251 llvm_dwarfdump_stats_opt = "--statistics" 252 253 # Generate the stats with the llvm-dwarfdump. 254 subproc = Popen( 255 [llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary], 256 stdin=PIPE, 257 stdout=PIPE, 258 stderr=PIPE, 259 universal_newlines=True, 260 ) 261 cmd_stdout, cmd_stderr = subproc.communicate() 262 263 # TODO: Handle errors that are coming from llvm-dwarfdump. 264 265 # Get the JSON and parse it. 266 json_parsed = None 267 268 try: 269 json_parsed = loads(cmd_stdout) 270 except: 271 print("error: No valid llvm-dwarfdump statistics found.") 272 sys.exit(1) 273 274 # TODO: Parse the statistics Version from JSON. 275 276 def init_field(name): 277 if json_parsed[name] == "overflowed": 278 print('warning: "' + name + '" field overflowed.') 279 return TAINT_VALUE 280 return json_parsed[name] 281 282 if opts.only_variables: 283 # Read the JSON only for local variables. 284 variables_total_locstats = init_field( 285 "#local vars processed by location statistics" 286 ) 287 variables_scope_bytes_covered = init_field( 288 "sum_all_local_vars(#bytes in parent scope covered" " by DW_AT_location)" 289 ) 290 variables_scope_bytes = init_field("sum_all_local_vars(#bytes in parent scope)") 291 if not opts.ignore_debug_entry_values: 292 for cov_bucket in coverage_buckets(): 293 cov_category = ( 294 "#local vars with {} of parent scope covered " 295 "by DW_AT_location".format(cov_bucket) 296 ) 297 variables_coverage_map[cov_bucket] = init_field(cov_category) 298 else: 299 variables_scope_bytes_entry_values = init_field( 300 "sum_all_local_vars(#bytes in parent scope " 301 "covered by DW_OP_entry_value)" 302 ) 303 if ( 304 variables_scope_bytes_covered != TAINT_VALUE 305 and variables_scope_bytes_entry_values != TAINT_VALUE 306 ): 307 variables_scope_bytes_covered = ( 308 variables_scope_bytes_covered - variables_scope_bytes_entry_values 309 ) 310 for cov_bucket in coverage_buckets(): 311 cov_category = ( 312 "#local vars - entry values with {} of parent scope " 313 "covered by DW_AT_location".format(cov_bucket) 314 ) 315 variables_coverage_map[cov_bucket] = init_field(cov_category) 316 elif opts.only_formal_parameters: 317 # Read the JSON only for formal parameters. 318 variables_total_locstats = init_field( 319 "#params processed by location statistics" 320 ) 321 variables_scope_bytes_covered = init_field( 322 "sum_all_params(#bytes in parent scope covered " "by DW_AT_location)" 323 ) 324 variables_scope_bytes = init_field("sum_all_params(#bytes in parent scope)") 325 if not opts.ignore_debug_entry_values: 326 for cov_bucket in coverage_buckets(): 327 cov_category = ( 328 "#params with {} of parent scope covered " 329 "by DW_AT_location".format(cov_bucket) 330 ) 331 variables_coverage_map[cov_bucket] = init_field(cov_category) 332 else: 333 variables_scope_bytes_entry_values = init_field( 334 "sum_all_params(#bytes in parent scope covered " "by DW_OP_entry_value)" 335 ) 336 if ( 337 variables_scope_bytes_covered != TAINT_VALUE 338 and variables_scope_bytes_entry_values != TAINT_VALUE 339 ): 340 variables_scope_bytes_covered = ( 341 variables_scope_bytes_covered - variables_scope_bytes_entry_values 342 ) 343 for cov_bucket in coverage_buckets(): 344 cov_category = ( 345 "#params - entry values with {} of parent scope covered" 346 " by DW_AT_location".format(cov_bucket) 347 ) 348 variables_coverage_map[cov_bucket] = init_field(cov_category) 349 else: 350 # Read the JSON for both local variables and formal parameters. 351 variables_total = init_field("#source variables") 352 variables_with_loc = init_field("#source variables with location") 353 variables_total_locstats = init_field( 354 "#variables processed by location statistics" 355 ) 356 variables_scope_bytes_covered = init_field( 357 "sum_all_variables(#bytes in parent scope covered " "by DW_AT_location)" 358 ) 359 variables_scope_bytes = init_field("sum_all_variables(#bytes in parent scope)") 360 361 if not opts.ignore_debug_entry_values: 362 for cov_bucket in coverage_buckets(): 363 cov_category = ( 364 "#variables with {} of parent scope covered " 365 "by DW_AT_location".format(cov_bucket) 366 ) 367 variables_coverage_map[cov_bucket] = init_field(cov_category) 368 else: 369 variables_scope_bytes_entry_values = init_field( 370 "sum_all_variables(#bytes in parent scope covered " 371 "by DW_OP_entry_value)" 372 ) 373 if ( 374 variables_scope_bytes_covered != TAINT_VALUE 375 and variables_scope_bytes_entry_values != TAINT_VALUE 376 ): 377 variables_scope_bytes_covered = ( 378 variables_scope_bytes_covered - variables_scope_bytes_entry_values 379 ) 380 for cov_bucket in coverage_buckets(): 381 cov_category = ( 382 "#variables - entry values with {} of parent scope covered " 383 "by DW_AT_location".format(cov_bucket) 384 ) 385 variables_coverage_map[cov_bucket] = init_field(cov_category) 386 387 return LocationStats( 388 binary, 389 variables_total, 390 variables_total_locstats, 391 variables_with_loc, 392 variables_scope_bytes_covered, 393 variables_scope_bytes, 394 variables_coverage_map, 395 ) 396 397 398# Parse the program arguments. 399def parse_program_args(parser): 400 parser.add_argument( 401 "--only-variables", 402 action="store_true", 403 default=False, 404 help="calculate the location statistics only for local variables", 405 ) 406 parser.add_argument( 407 "--only-formal-parameters", 408 action="store_true", 409 default=False, 410 help="calculate the location statistics only for formal parameters", 411 ) 412 parser.add_argument( 413 "--ignore-debug-entry-values", 414 action="store_true", 415 default=False, 416 help="ignore the location statistics on locations with " "entry values", 417 ) 418 parser.add_argument( 419 "--draw-plot", 420 action="store_true", 421 default=False, 422 help="show histogram of location buckets generated (requires " "matplotlib)", 423 ) 424 parser.add_argument( 425 "--compare", 426 action="store_true", 427 default=False, 428 help="compare the debug location coverage on two files provided, " 429 "and draw a plot showing the difference (requires " 430 "matplotlib)", 431 ) 432 parser.add_argument("file_names", nargs="+", type=str, help="file to process") 433 434 return parser.parse_args() 435 436 437# Verify that the program inputs meet the requirements. 438def verify_program_inputs(opts): 439 if len(sys.argv) < 2: 440 print("error: Too few arguments.") 441 return False 442 443 if opts.only_variables and opts.only_formal_parameters: 444 print("error: Please use just one --only* option.") 445 return False 446 447 if not opts.compare and len(opts.file_names) != 1: 448 print("error: Please specify only one file to process.") 449 return False 450 451 if opts.compare and len(opts.file_names) != 2: 452 print("error: Please specify two files to process.") 453 return False 454 455 if opts.draw_plot or opts.compare: 456 try: 457 import matplotlib 458 except ImportError: 459 print("error: matplotlib not found.") 460 return False 461 462 return True 463 464 465def Main(): 466 parser = argparse.ArgumentParser() 467 opts = parse_program_args(parser) 468 469 if not verify_program_inputs(opts): 470 parser.print_help() 471 sys.exit(1) 472 473 binary_file = opts.file_names[0] 474 locstats = parse_locstats(opts, binary_file) 475 476 if not opts.compare: 477 if opts.draw_plot: 478 # Draw a histogram representing the location buckets. 479 locstats.draw_plot() 480 else: 481 # Pretty print collected info on the standard output. 482 if locstats.pretty_print() == -1: 483 sys.exit(0) 484 else: 485 binary_file_to_compare = opts.file_names[1] 486 locstats_to_compare = parse_locstats(opts, binary_file_to_compare) 487 # Draw a plot showing the difference in debug location coverage between 488 # two files. 489 locstats.draw_location_diff(locstats_to_compare) 490 491 492if __name__ == "__main__": 493 Main() 494 sys.exit(0) 495