xref: /llvm-project/libc/benchmarks/libc-benchmark-analysis.py3 (revision deae7e982a3b08996455e2cdfdc5062bf37895a3)
1"""Reads JSON files produced by the benchmarking framework and renders them.
2
3Installation:
4> apt-get install python3-pip
5> pip3 install matplotlib pandas seaborn
6
7Run:
8> python3 libc/benchmarks/libc-benchmark-analysis.py3 <files>
9"""
10
11import argparse
12import json
13import pandas as pd
14import seaborn as sns
15import matplotlib.pyplot as plt
16from matplotlib.ticker import EngFormatter
17
18def formatUnit(value, unit):
19    return EngFormatter(unit, sep="").format_data(value)
20
21def formatCache(cache):
22  letter = cache["Type"][0].lower()
23  level = cache["Level"]
24  size = formatUnit(cache["Size"], "B")
25  ways = cache["NumSharing"]
26  return F'{letter}L{level}:{size}/{ways}'
27
28def getCpuFrequency(study):
29    return study["Runtime"]["Host"]["CpuFrequency"]
30
31def getId(study):
32    CpuName = study["Runtime"]["Host"]["CpuName"]
33    CpuFrequency = formatUnit(getCpuFrequency(study), "Hz")
34    Mode = " (Sweep)" if study["Configuration"]["IsSweepMode"] else ""
35    CpuCaches = ", ".join(formatCache(c) for c in study["Runtime"]["Host"]["Caches"])
36    return F'{CpuName} {CpuFrequency}{Mode}\n{CpuCaches}'
37
38def getFunction(study):
39    return study["Configuration"]["Function"]
40
41def getLabel(study):
42    return F'{getFunction(study)} {study["StudyName"]}'
43
44def displaySweepData(id, studies, mode):
45    df = None
46    for study in studies:
47        Measurements = study["Measurements"]
48        SweepModeMaxSize = study["Configuration"]["SweepModeMaxSize"]
49        NumSizes = SweepModeMaxSize + 1
50        NumTrials = study["Configuration"]["NumTrials"]
51        assert NumTrials * NumSizes  == len(Measurements), 'not a multiple of NumSizes'
52        Index = pd.MultiIndex.from_product([range(NumSizes), range(NumTrials)], names=['size', 'trial'])
53        if df is None:
54            df = pd.DataFrame(Measurements, index=Index, columns=[getLabel(study)])
55        else:
56            df[getLabel(study)] = pd.Series(Measurements, index=Index)
57    df = df.reset_index(level='trial', drop=True)
58    if mode == "cycles":
59        df *= getCpuFrequency(study)
60    if mode == "bytespercycle":
61        df *= getCpuFrequency(study)
62        for col in df.columns:
63            df[col] = pd.Series(data=df.index, index=df.index).divide(df[col])
64    FormatterUnit = {"time":"s","cycles":"","bytespercycle":"B/cycle"}[mode]
65    Label = {"time":"Time","cycles":"Cycles","bytespercycle":"Byte/cycle"}[mode]
66    graph = sns.lineplot(data=df, palette="muted", ci=95)
67    graph.set_title(id)
68    graph.yaxis.set_major_formatter(EngFormatter(unit=FormatterUnit))
69    graph.yaxis.set_label_text(Label)
70    graph.xaxis.set_major_formatter(EngFormatter(unit="B"))
71    graph.xaxis.set_label_text("Copy Size")
72    _ = plt.xticks(rotation=90)
73    plt.show()
74
75def displayDistributionData(id, studies, mode):
76    distributions = set()
77    df = None
78    for study in studies:
79        distribution = study["Configuration"]["SizeDistributionName"]
80        distributions.add(distribution)
81        local = pd.DataFrame(study["Measurements"], columns=["time"])
82        local["distribution"] = distribution
83        local["label"] = getLabel(study)
84        local["cycles"] = local["time"] * getCpuFrequency(study)
85        if df is None:
86            df = local
87        else:
88            df = df.append(local)
89    if mode == "bytespercycle":
90        mode = "time"
91        print("`--mode=bytespercycle` is ignored for distribution mode reports")
92    FormatterUnit = {"time":"s","cycles":""}[mode]
93    Label = {"time":"Time","cycles":"Cycles"}[mode]
94    graph = sns.violinplot(data=df, x="distribution", y=mode, palette="muted", hue="label", order=sorted(distributions))
95    graph.set_title(id)
96    graph.yaxis.set_major_formatter(EngFormatter(unit=FormatterUnit))
97    graph.yaxis.set_label_text(Label)
98    _ = plt.xticks(rotation=90)
99    plt.show()
100
101
102def main():
103    parser = argparse.ArgumentParser(description="Process benchmark json files.")
104    parser.add_argument("--mode", choices=["time", "cycles", "bytespercycle"], default="time", help="Use to display either 'time', 'cycles' or 'bytes/cycle'.")
105    parser.add_argument("files", nargs="+", help="The json files to read from.")
106
107    args = parser.parse_args()
108    study_groups = dict()
109    for file in args.files:
110        with open(file) as json_file:
111            json_obj = json.load(json_file)
112            Id = getId(json_obj)
113            if Id in study_groups:
114                study_groups[Id].append(json_obj)
115            else:
116                study_groups[Id] = [json_obj]
117
118    plt.tight_layout()
119    sns.set_theme(style="ticks")
120    for id, study_collection in study_groups.items():
121        if "(Sweep)" in id:
122            displaySweepData(id, study_collection, args.mode)
123        else:
124            displayDistributionData(id, study_collection, args.mode)
125
126
127if __name__ == "__main__":
128    main()
129