1#!/usr/bin/env python 2 3""" 4CmpRuns - A simple tool for comparing two static analyzer runs to determine 5which reports have been added, removed, or changed. 6 7This is designed to support automated testing using the static analyzer, from 8two perspectives: 9 1. To monitor changes in the static analyzer's reports on real code bases, 10 for regression testing. 11 12 2. For use by end users who want to integrate regular static analyzer testing 13 into a buildbot like environment. 14 15Usage: 16 17 # Load the results of both runs, to obtain lists of the corresponding 18 # AnalysisDiagnostic objects. 19 # 20 resultsA = load_results_from_single_run(singleRunInfoA, delete_empty) 21 resultsB = load_results_from_single_run(singleRunInfoB, delete_empty) 22 23 # Generate a relation from diagnostics in run A to diagnostics in run B 24 # to obtain a list of triples (a, b, confidence). 25 diff = compare_results(resultsA, resultsB) 26 27""" 28import json 29import os 30import plistlib 31import re 32import sys 33 34from math import log 35from collections import defaultdict 36from copy import copy 37from enum import Enum 38from typing import ( 39 Any, 40 DefaultDict, 41 Dict, 42 List, 43 NamedTuple, 44 Optional, 45 Sequence, 46 Set, 47 TextIO, 48 TypeVar, 49 Tuple, 50 Union, 51) 52 53 54Number = Union[int, float] 55Stats = Dict[str, Dict[str, Number]] 56Plist = Dict[str, Any] 57JSON = Dict[str, Any] 58# Diff in a form: field -> (before, after) 59JSONDiff = Dict[str, Tuple[str, str]] 60# Type for generics 61T = TypeVar("T") 62 63STATS_REGEXP = re.compile(r"Statistics: (\{.+\})", re.MULTILINE | re.DOTALL) 64 65 66class Colors: 67 """ 68 Color for terminal highlight. 69 """ 70 71 RED = "\x1b[2;30;41m" 72 GREEN = "\x1b[6;30;42m" 73 CLEAR = "\x1b[0m" 74 75 76class HistogramType(str, Enum): 77 RELATIVE = "relative" 78 LOG_RELATIVE = "log-relative" 79 ABSOLUTE = "absolute" 80 81 82class ResultsDirectory(NamedTuple): 83 path: str 84 root: str = "" 85 86 87class SingleRunInfo: 88 """ 89 Information about analysis run: 90 path - the analysis output directory 91 root - the name of the root directory, which will be disregarded when 92 determining the source file name 93 """ 94 95 def __init__(self, results: ResultsDirectory, verbose_log: Optional[str] = None): 96 self.path = results.path 97 self.root = results.root.rstrip("/\\") 98 self.verbose_log = verbose_log 99 100 101class AnalysisDiagnostic: 102 def __init__( 103 self, data: Plist, report: "AnalysisReport", html_report: Optional[str] 104 ): 105 self._data = data 106 self._loc = self._data["location"] 107 self._report = report 108 self._html_report = html_report 109 self._report_size = len(self._data["path"]) 110 111 def get_file_name(self) -> str: 112 root = self._report.run.root 113 file_name = self._report.files[self._loc["file"]] 114 115 if file_name.startswith(root) and len(root) > 0: 116 return file_name[len(root) + 1 :] 117 118 return file_name 119 120 def get_root_file_name(self) -> str: 121 path = self._data["path"] 122 123 if not path: 124 return self.get_file_name() 125 126 p = path[0] 127 if "location" in p: 128 file_index = p["location"]["file"] 129 else: # control edge 130 file_index = path[0]["edges"][0]["start"][0]["file"] 131 132 out = self._report.files[file_index] 133 root = self._report.run.root 134 135 if out.startswith(root): 136 return out[len(root) :] 137 138 return out 139 140 def get_line(self) -> int: 141 return self._loc["line"] 142 143 def get_column(self) -> int: 144 return self._loc["col"] 145 146 def get_path_length(self) -> int: 147 return self._report_size 148 149 def get_category(self) -> str: 150 return self._data["category"] 151 152 def get_description(self) -> str: 153 return self._data["description"] 154 155 def get_location(self) -> str: 156 return f"{self.get_file_name()}:{self.get_line()}:{self.get_column()}" 157 158 def get_issue_identifier(self) -> str: 159 id = self.get_file_name() + "+" 160 161 if "issue_context" in self._data: 162 id += self._data["issue_context"] + "+" 163 164 if "issue_hash_content_of_line_in_context" in self._data: 165 id += str(self._data["issue_hash_content_of_line_in_context"]) 166 167 return id 168 169 def get_html_report(self) -> str: 170 if self._html_report is None: 171 return " " 172 173 return os.path.join(self._report.run.path, self._html_report) 174 175 def get_readable_name(self) -> str: 176 if "issue_context" in self._data: 177 funcname_postfix = "#" + self._data["issue_context"] 178 else: 179 funcname_postfix = "" 180 181 root_filename = self.get_root_file_name() 182 file_name = self.get_file_name() 183 184 if root_filename != file_name: 185 file_prefix = f"[{root_filename}] {file_name}" 186 else: 187 file_prefix = root_filename 188 189 line = self.get_line() 190 col = self.get_column() 191 return ( 192 f"{file_prefix}{funcname_postfix}:{line}:{col}" 193 f", {self.get_category()}: {self.get_description()}" 194 ) 195 196 KEY_FIELDS = ["check_name", "category", "description"] 197 198 def is_similar_to(self, other: "AnalysisDiagnostic") -> bool: 199 # We consider two diagnostics similar only if at least one 200 # of the key fields is the same in both diagnostics. 201 return len(self.get_diffs(other)) != len(self.KEY_FIELDS) 202 203 def get_diffs(self, other: "AnalysisDiagnostic") -> JSONDiff: 204 return { 205 field: (self._data[field], other._data[field]) 206 for field in self.KEY_FIELDS 207 if self._data[field] != other._data[field] 208 } 209 210 # Note, the data format is not an API and may change from one analyzer 211 # version to another. 212 def get_raw_data(self) -> Plist: 213 return self._data 214 215 def __eq__(self, other: object) -> bool: 216 return hash(self) == hash(other) 217 218 def __ne__(self, other: object) -> bool: 219 return hash(self) != hash(other) 220 221 def __hash__(self) -> int: 222 return hash(self.get_issue_identifier()) 223 224 225class AnalysisRun: 226 def __init__(self, info: SingleRunInfo): 227 self.path = info.path 228 self.root = info.root 229 self.info = info 230 self.reports: List[AnalysisReport] = [] 231 # Cumulative list of all diagnostics from all the reports. 232 self.diagnostics: List[AnalysisDiagnostic] = [] 233 self.clang_version: Optional[str] = None 234 self.raw_stats: List[JSON] = [] 235 236 def get_clang_version(self) -> Optional[str]: 237 return self.clang_version 238 239 def read_single_file(self, path: str, delete_empty: bool): 240 with open(path, "rb") as plist_file: 241 data = plistlib.load(plist_file) 242 243 if "statistics" in data: 244 self.raw_stats.append(json.loads(data["statistics"])) 245 data.pop("statistics") 246 247 # We want to retrieve the clang version even if there are no 248 # reports. Assume that all reports were created using the same 249 # clang version (this is always true and is more efficient). 250 if "clang_version" in data: 251 if self.clang_version is None: 252 self.clang_version = data.pop("clang_version") 253 else: 254 data.pop("clang_version") 255 256 # Ignore/delete empty reports. 257 if not data["files"]: 258 if delete_empty: 259 os.remove(path) 260 return 261 262 # Extract the HTML reports, if they exists. 263 htmlFiles = [] 264 for d in data["diagnostics"]: 265 if "HTMLDiagnostics_files" in d: 266 # FIXME: Why is this named files, when does it have multiple 267 # files? 268 assert len(d["HTMLDiagnostics_files"]) == 1 269 htmlFiles.append(d.pop("HTMLDiagnostics_files")[0]) 270 else: 271 htmlFiles.append(None) 272 273 report = AnalysisReport(self, data.pop("files")) 274 # Python 3.10 offers zip(..., strict=True). The following assertion 275 # mimics it. 276 assert len(data["diagnostics"]) == len(htmlFiles) 277 diagnostics = [ 278 AnalysisDiagnostic(d, report, h) 279 for d, h in zip(data.pop("diagnostics"), htmlFiles) 280 ] 281 282 assert not data 283 284 report.diagnostics.extend(diagnostics) 285 self.reports.append(report) 286 self.diagnostics.extend(diagnostics) 287 288 289class AnalysisReport: 290 def __init__(self, run: AnalysisRun, files: List[str]): 291 self.run = run 292 self.files = files 293 self.diagnostics: List[AnalysisDiagnostic] = [] 294 295 296def load_results( 297 results: ResultsDirectory, 298 delete_empty: bool = True, 299 verbose_log: Optional[str] = None, 300) -> AnalysisRun: 301 """ 302 Backwards compatibility API. 303 """ 304 return load_results_from_single_run( 305 SingleRunInfo(results, verbose_log), delete_empty 306 ) 307 308 309def load_results_from_single_run( 310 info: SingleRunInfo, delete_empty: bool = True 311) -> AnalysisRun: 312 """ 313 # Load results of the analyzes from a given output folder. 314 # - info is the SingleRunInfo object 315 # - delete_empty specifies if the empty plist files should be deleted 316 317 """ 318 path = info.path 319 run = AnalysisRun(info) 320 321 if os.path.isfile(path): 322 run.read_single_file(path, delete_empty) 323 else: 324 for dirpath, dirnames, filenames in os.walk(path): 325 for f in filenames: 326 if not f.endswith("plist"): 327 continue 328 329 p = os.path.join(dirpath, f) 330 run.read_single_file(p, delete_empty) 331 332 return run 333 334 335def cmp_analysis_diagnostic(d): 336 return d.get_issue_identifier() 337 338 339AnalysisDiagnosticPair = Tuple[AnalysisDiagnostic, AnalysisDiagnostic] 340 341 342class ComparisonResult: 343 def __init__(self): 344 self.present_in_both: List[AnalysisDiagnostic] = [] 345 self.present_only_in_old: List[AnalysisDiagnostic] = [] 346 self.present_only_in_new: List[AnalysisDiagnostic] = [] 347 self.changed_between_new_and_old: List[AnalysisDiagnosticPair] = [] 348 349 def add_common(self, issue: AnalysisDiagnostic): 350 self.present_in_both.append(issue) 351 352 def add_removed(self, issue: AnalysisDiagnostic): 353 self.present_only_in_old.append(issue) 354 355 def add_added(self, issue: AnalysisDiagnostic): 356 self.present_only_in_new.append(issue) 357 358 def add_changed(self, old_issue: AnalysisDiagnostic, new_issue: AnalysisDiagnostic): 359 self.changed_between_new_and_old.append((old_issue, new_issue)) 360 361 362GroupedDiagnostics = DefaultDict[str, List[AnalysisDiagnostic]] 363 364 365def get_grouped_diagnostics( 366 diagnostics: List[AnalysisDiagnostic], 367) -> GroupedDiagnostics: 368 result: GroupedDiagnostics = defaultdict(list) 369 for diagnostic in diagnostics: 370 result[diagnostic.get_location()].append(diagnostic) 371 return result 372 373 374def compare_results( 375 results_old: AnalysisRun, 376 results_new: AnalysisRun, 377 histogram: Optional[HistogramType] = None, 378) -> ComparisonResult: 379 """ 380 compare_results - Generate a relation from diagnostics in run A to 381 diagnostics in run B. 382 383 The result is the relation as a list of triples (a, b) where 384 each element {a,b} is None or a matching element from the respective run 385 """ 386 387 res = ComparisonResult() 388 389 # Map size_before -> size_after 390 path_difference_data: List[float] = [] 391 392 diags_old = get_grouped_diagnostics(results_old.diagnostics) 393 diags_new = get_grouped_diagnostics(results_new.diagnostics) 394 395 locations_old = set(diags_old.keys()) 396 locations_new = set(diags_new.keys()) 397 398 common_locations = locations_old & locations_new 399 400 for location in common_locations: 401 old = diags_old[location] 402 new = diags_new[location] 403 404 # Quadratic algorithms in this part are fine because 'old' and 'new' 405 # are most commonly of size 1. 406 common: Set[AnalysisDiagnostic] = set() 407 for a in old: 408 for b in new: 409 if a.get_issue_identifier() == b.get_issue_identifier(): 410 a_path_len = a.get_path_length() 411 b_path_len = b.get_path_length() 412 413 if a_path_len != b_path_len: 414 415 if histogram == HistogramType.RELATIVE: 416 path_difference_data.append(float(a_path_len) / b_path_len) 417 418 elif histogram == HistogramType.LOG_RELATIVE: 419 path_difference_data.append( 420 log(float(a_path_len) / b_path_len) 421 ) 422 423 elif histogram == HistogramType.ABSOLUTE: 424 path_difference_data.append(a_path_len - b_path_len) 425 426 res.add_common(b) 427 common.add(a) 428 429 old = filter_issues(old, common) 430 new = filter_issues(new, common) 431 common = set() 432 433 for a in old: 434 for b in new: 435 if a.is_similar_to(b): 436 res.add_changed(a, b) 437 common.add(a) 438 common.add(b) 439 440 old = filter_issues(old, common) 441 new = filter_issues(new, common) 442 443 # Whatever is left in 'old' doesn't have a corresponding diagnostic 444 # in 'new', so we need to mark it as 'removed'. 445 for a in old: 446 res.add_removed(a) 447 448 # Whatever is left in 'new' doesn't have a corresponding diagnostic 449 # in 'old', so we need to mark it as 'added'. 450 for b in new: 451 res.add_added(b) 452 453 only_old_locations = locations_old - common_locations 454 for location in only_old_locations: 455 for a in diags_old[location]: 456 # These locations have been found only in the old build, so we 457 # need to mark all of therm as 'removed' 458 res.add_removed(a) 459 460 only_new_locations = locations_new - common_locations 461 for location in only_new_locations: 462 for b in diags_new[location]: 463 # These locations have been found only in the new build, so we 464 # need to mark all of therm as 'added' 465 res.add_added(b) 466 467 # FIXME: Add fuzzy matching. One simple and possible effective idea would 468 # be to bin the diagnostics, print them in a normalized form (based solely 469 # on the structure of the diagnostic), compute the diff, then use that as 470 # the basis for matching. This has the nice property that we don't depend 471 # in any way on the diagnostic format. 472 473 if histogram: 474 from matplotlib import pyplot 475 476 pyplot.hist(path_difference_data, bins=100) 477 pyplot.show() 478 479 return res 480 481 482def filter_issues( 483 origin: List[AnalysisDiagnostic], to_remove: Set[AnalysisDiagnostic] 484) -> List[AnalysisDiagnostic]: 485 return [diag for diag in origin if diag not in to_remove] 486 487 488def compute_percentile(values: Sequence[T], percentile: float) -> T: 489 """ 490 Return computed percentile. 491 """ 492 return sorted(values)[int(round(percentile * len(values) + 0.5)) - 1] 493 494 495def derive_stats(results: AnalysisRun) -> Stats: 496 # Assume all keys are the same in each statistics bucket. 497 combined_data = defaultdict(list) 498 499 # Collect data on paths length. 500 for report in results.reports: 501 for diagnostic in report.diagnostics: 502 combined_data["PathsLength"].append(diagnostic.get_path_length()) 503 504 for stat in results.raw_stats: 505 for key, value in stat.items(): 506 combined_data[str(key)].append(value) 507 508 combined_stats: Stats = {} 509 510 for key, values in combined_data.items(): 511 combined_stats[key] = { 512 "max": max(values), 513 "min": min(values), 514 "mean": sum(values) / len(values), 515 "90th %tile": compute_percentile(values, 0.9), 516 "95th %tile": compute_percentile(values, 0.95), 517 "median": sorted(values)[len(values) // 2], 518 "total": sum(values), 519 } 520 521 return combined_stats 522 523 524# TODO: compare_results decouples comparison from the output, we should 525# do it here as well 526def compare_stats( 527 results_old: AnalysisRun, results_new: AnalysisRun, out: TextIO = sys.stdout 528): 529 stats_old = derive_stats(results_old) 530 stats_new = derive_stats(results_new) 531 532 old_keys = set(stats_old.keys()) 533 new_keys = set(stats_new.keys()) 534 keys = sorted(old_keys & new_keys) 535 536 for key in keys: 537 out.write(f"{key}\n") 538 539 nested_keys = sorted(set(stats_old[key]) & set(stats_new[key])) 540 541 for nested_key in nested_keys: 542 val_old = float(stats_old[key][nested_key]) 543 val_new = float(stats_new[key][nested_key]) 544 545 report = f"{val_old:.3f} -> {val_new:.3f}" 546 547 # Only apply highlighting when writing to TTY and it's not Windows 548 if out.isatty() and os.name != "nt": 549 if val_new != 0: 550 ratio = (val_new - val_old) / val_new 551 if ratio < -0.2: 552 report = Colors.GREEN + report + Colors.CLEAR 553 elif ratio > 0.2: 554 report = Colors.RED + report + Colors.CLEAR 555 556 out.write(f"\t {nested_key} {report}\n") 557 558 removed_keys = old_keys - new_keys 559 if removed_keys: 560 out.write(f"REMOVED statistics: {removed_keys}\n") 561 562 added_keys = new_keys - old_keys 563 if added_keys: 564 out.write(f"ADDED statistics: {added_keys}\n") 565 566 out.write("\n") 567 568 569def dump_scan_build_results_diff( 570 dir_old: ResultsDirectory, 571 dir_new: ResultsDirectory, 572 delete_empty: bool = True, 573 out: TextIO = sys.stdout, 574 show_stats: bool = False, 575 stats_only: bool = False, 576 histogram: Optional[HistogramType] = None, 577 verbose_log: Optional[str] = None, 578): 579 """ 580 Compare directories with analysis results and dump results. 581 582 :param delete_empty: delete empty plist files 583 :param out: buffer to dump comparison results to. 584 :param show_stats: compare execution stats as well. 585 :param stats_only: compare ONLY execution stats. 586 :param histogram: optional histogram type to plot path differences. 587 :param verbose_log: optional path to an additional log file. 588 """ 589 results_old = load_results(dir_old, delete_empty, verbose_log) 590 results_new = load_results(dir_new, delete_empty, verbose_log) 591 592 if show_stats or stats_only: 593 compare_stats(results_old, results_new) 594 if stats_only: 595 return 596 597 # Open the verbose log, if given. 598 if verbose_log: 599 aux_log: Optional[TextIO] = open(verbose_log, "w") 600 else: 601 aux_log = None 602 603 diff = compare_results(results_old, results_new, histogram) 604 found_diffs = 0 605 total_added = 0 606 total_removed = 0 607 total_modified = 0 608 609 for new in diff.present_only_in_new: 610 out.write(f"ADDED: {new.get_readable_name()}\n\n") 611 found_diffs += 1 612 total_added += 1 613 if aux_log: 614 aux_log.write( 615 f"('ADDED', {new.get_readable_name()}, " f"{new.get_html_report()})\n" 616 ) 617 618 for old in diff.present_only_in_old: 619 out.write(f"REMOVED: {old.get_readable_name()}\n\n") 620 found_diffs += 1 621 total_removed += 1 622 if aux_log: 623 aux_log.write( 624 f"('REMOVED', {old.get_readable_name()}, " f"{old.get_html_report()})\n" 625 ) 626 627 for old, new in diff.changed_between_new_and_old: 628 out.write(f"MODIFIED: {old.get_readable_name()}\n") 629 found_diffs += 1 630 total_modified += 1 631 diffs = old.get_diffs(new) 632 str_diffs = [ 633 f" '{key}' changed: " f"'{old_value}' -> '{new_value}'" 634 for key, (old_value, new_value) in diffs.items() 635 ] 636 out.write(",\n".join(str_diffs) + "\n\n") 637 if aux_log: 638 aux_log.write( 639 f"('MODIFIED', {old.get_readable_name()}, " 640 f"{old.get_html_report()})\n" 641 ) 642 643 total_reports = len(results_new.diagnostics) 644 out.write(f"TOTAL REPORTS: {total_reports}\n") 645 out.write(f"TOTAL ADDED: {total_added}\n") 646 out.write(f"TOTAL REMOVED: {total_removed}\n") 647 out.write(f"TOTAL MODIFIED: {total_modified}\n") 648 649 if aux_log: 650 aux_log.write(f"('TOTAL NEW REPORTS', {total_reports})\n") 651 aux_log.write(f"('TOTAL DIFFERENCES', {found_diffs})\n") 652 aux_log.close() 653 654 # TODO: change to NamedTuple 655 return found_diffs, len(results_old.diagnostics), len(results_new.diagnostics) 656 657 658if __name__ == "__main__": 659 print("CmpRuns.py should not be used on its own.") 660 print("Please use 'SATest.py compare' instead") 661 sys.exit(1) 662