xref: /llvm-project/clang/utils/analyzer/SATestBenchmark.py (revision dd3c26a045c081620375a878159f536758baba6e)
1"""
2Static Analyzer qualification infrastructure.
3
4This source file contains all the functionality related to benchmarking
5the analyzer on a set projects.  Right now, this includes measuring
6execution time and peak memory usage.  Benchmark runs analysis on every
7project multiple times to get a better picture about the distribution
8of measured values.
9
10Additionally, this file includes a comparison routine for two benchmarking
11results that plots the result together on one chart.
12"""
13
14import SATestUtils as utils
15from SATestBuild import ProjectTester, stdout, TestInfo
16from ProjectMap import ProjectInfo
17
18import pandas as pd
19from typing import List, Tuple
20
21
22INDEX_COLUMN = "index"
23
24
25def _save(data: pd.DataFrame, file_path: str):
26    data.to_csv(file_path, index_label=INDEX_COLUMN)
27
28
29def _load(file_path: str) -> pd.DataFrame:
30    return pd.read_csv(file_path, index_col=INDEX_COLUMN)
31
32
33class Benchmark:
34    """
35    Becnhmark class encapsulates one functionality: it runs the analysis
36    multiple times for the given set of projects and stores results in the
37    specified file.
38    """
39
40    def __init__(self, projects: List[ProjectInfo], iterations: int, output_path: str):
41        self.projects = projects
42        self.iterations = iterations
43        self.out = output_path
44
45    def run(self):
46        results = [self._benchmark_project(project) for project in self.projects]
47
48        data = pd.concat(results, ignore_index=True)
49        _save(data, self.out)
50
51    def _benchmark_project(self, project: ProjectInfo) -> pd.DataFrame:
52        if not project.enabled:
53            stdout(f" \n\n--- Skipping disabled project {project.name}\n")
54            return
55
56        stdout(f" \n\n--- Benchmarking project {project.name}\n")
57
58        test_info = TestInfo(project)
59        tester = ProjectTester(test_info, silent=True)
60        project_dir = tester.get_project_dir()
61        output_dir = tester.get_output_dir()
62
63        raw_data = []
64
65        for i in range(self.iterations):
66            stdout(f"Iteration #{i + 1}")
67            time, mem = tester.build(project_dir, output_dir)
68            raw_data.append(
69                {"time": time, "memory": mem, "iteration": i, "project": project.name}
70            )
71            stdout(
72                f"time: {utils.time_to_str(time)}, "
73                f"peak memory: {utils.memory_to_str(mem)}"
74            )
75
76        return pd.DataFrame(raw_data)
77
78
79def compare(old_path: str, new_path: str, plot_file: str):
80    """
81    Compare two benchmarking results stored as .csv files
82    and produce a plot in the specified file.
83    """
84    old = _load(old_path)
85    new = _load(new_path)
86
87    old_projects = set(old["project"])
88    new_projects = set(new["project"])
89    common_projects = old_projects & new_projects
90
91    # Leave only rows for projects common to both dataframes.
92    old = old[old["project"].isin(common_projects)]
93    new = new[new["project"].isin(common_projects)]
94
95    old, new = _normalize(old, new)
96
97    # Seaborn prefers all the data to be in one dataframe.
98    old["kind"] = "old"
99    new["kind"] = "new"
100    data = pd.concat([old, new], ignore_index=True)
101
102    # TODO: compare data in old and new dataframes using statistical tests
103    #       to check if they belong to the same distribution
104    _plot(data, plot_file)
105
106
107def _normalize(
108    old: pd.DataFrame, new: pd.DataFrame
109) -> Tuple[pd.DataFrame, pd.DataFrame]:
110    # This creates a dataframe with all numerical data averaged.
111    means = old.groupby("project").mean()
112    return _normalize_impl(old, means), _normalize_impl(new, means)
113
114
115def _normalize_impl(data: pd.DataFrame, means: pd.DataFrame):
116    # Right now 'means' has one row corresponding to one project,
117    # while 'data' has N rows for each project (one for each iteration).
118    #
119    # In order for us to work easier with this data, we duplicate
120    # 'means' data to match the size of the 'data' dataframe.
121    #
122    # All the columns from 'data' will maintain their names, while
123    # new columns coming from 'means' will have "_mean" suffix.
124    joined_data = data.merge(means, on="project", suffixes=("", "_mean"))
125    _normalize_key(joined_data, "time")
126    _normalize_key(joined_data, "memory")
127    return joined_data
128
129
130def _normalize_key(data: pd.DataFrame, key: str):
131    norm_key = _normalized_name(key)
132    mean_key = f"{key}_mean"
133    data[norm_key] = data[key] / data[mean_key]
134
135
136def _normalized_name(name: str) -> str:
137    return f"normalized {name}"
138
139
140def _plot(data: pd.DataFrame, plot_file: str):
141    import matplotlib
142    import seaborn as sns
143    from matplotlib import pyplot as plt
144
145    sns.set_style("whitegrid")
146    # We want to have time and memory charts one above the other.
147    figure, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))
148
149    def _subplot(key: str, ax: matplotlib.axes.Axes):
150        sns.boxplot(
151            x="project",
152            y=_normalized_name(key),
153            hue="kind",
154            data=data,
155            palette=sns.color_palette("BrBG", 2),
156            ax=ax,
157        )
158
159    _subplot("time", ax1)
160    # No need to have xlabels on both top and bottom charts.
161    ax1.set_xlabel("")
162
163    _subplot("memory", ax2)
164    # The legend on the top chart is enough.
165    ax2.get_legend().remove()
166
167    figure.savefig(plot_file)
168