xref: /llvm-project/openmp/runtime/tools/summarizeStats.py (revision 89c5576ff9038ba53025ca82209fdc5f5b5d0bb4)
1#!/usr/bin/env python
2
3
4import pandas as pd
5import numpy as np
6import re
7import sys
8import os
9import argparse
10import matplotlib
11from matplotlib import pyplot as plt
12from matplotlib.projections.polar import PolarAxes
13from matplotlib.projections import register_projection
14
15"""
16Read the stats file produced by the OpenMP runtime
17and produce a processed summary
18
19The radar_factory original code was taken from
20matplotlib.org/examples/api/radar_chart.html
21We added support to handle negative values for radar charts
22"""
23
24def radar_factory(num_vars, frame='circle'):
25    """Create a radar chart with num_vars axes."""
26    # calculate evenly-spaced axis angles
27    theta = 2*np.pi * np.linspace(0, 1-1./num_vars, num_vars)
28    # rotate theta such that the first axis is at the top
29    #theta += np.pi/2
30
31    def draw_poly_frame(self, x0, y0, r):
32        # TODO: use transforms to convert (x, y) to (r, theta)
33        verts = [(r*np.cos(t) + x0, r*np.sin(t) + y0) for t in theta]
34        return plt.Polygon(verts, closed=True, edgecolor='k')
35
36    def draw_circle_frame(self, x0, y0, r):
37        return plt.Circle((x0, y0), r)
38
39    frame_dict = {'polygon': draw_poly_frame, 'circle': draw_circle_frame}
40    if frame not in frame_dict:
41        raise ValueError("unknown value for `frame`: %s" % frame)
42
43    class RadarAxes(PolarAxes):
44        """
45        Class for creating a radar chart (a.k.a. a spider or star chart)
46
47        http://en.wikipedia.org/wiki/Radar_chart
48        """
49        name = 'radar'
50        # use 1 line segment to connect specified points
51        RESOLUTION = 1
52        # define draw_frame method
53        draw_frame = frame_dict[frame]
54
55        def fill(self, *args, **kwargs):
56            """Override fill so that line is closed by default"""
57            closed = kwargs.pop('closed', True)
58            return super(RadarAxes, self).fill(closed=closed, *args, **kwargs)
59
60        def plot(self, *args, **kwargs):
61            """Override plot so that line is closed by default"""
62            lines = super(RadarAxes, self).plot(*args, **kwargs)
63            #for line in lines:
64            #    self._close_line(line)
65
66        def set_varlabels(self, labels):
67            self.set_thetagrids(theta * 180/np.pi, labels,fontsize=14)
68
69        def _gen_axes_patch(self):
70            x0, y0 = (0.5, 0.5)
71            r = 0.5
72            return self.draw_frame(x0, y0, r)
73
74    register_projection(RadarAxes)
75    return theta
76
77# Code to read the raw stats
78def extractSI(s):
79    """Convert a measurement with a range suffix into a suitably scaled value"""
80    du     = s.split()
81    num    = float(du[0])
82    units  = du[1] if len(du) == 2 else ' '
83    # http://physics.nist.gov/cuu/Units/prefixes.html
84    factor = {'Y':  1e24,
85              'Z':  1e21,
86              'E':  1e18,
87              'P':  1e15,
88              'T':  1e12,
89              'G':  1e9,
90              'M':  1e6,
91              'k':  1e3,
92              ' ':  1  ,
93              'm': -1e3, # Yes, I do mean that, see below for the explanation.
94              'u': -1e6,
95              'n': -1e9,
96              'p': -1e12,
97              'f': -1e15,
98              'a': -1e18,
99              'z': -1e21,
100              'y': -1e24}[units[0]]
101    # Minor trickery here is an attempt to preserve accuracy by using a single
102    # divide, rather than  multiplying by 1/x, which introduces two roundings
103    # since 1/10 is not representable perfectly in IEEE floating point. (Not
104    # that this really matters, other than for cleanliness, since we're likely
105    # reading numbers with at most five decimal digits of precision).
106    return  num*factor if factor > 0 else num/-factor
107
108def readData(f):
109    line = f.readline()
110    fieldnames = [x.strip() for x in line.split(',')]
111    line = f.readline().strip()
112    data = []
113    while line != "":
114        if line[0] != '#':
115            fields = line.split(',')
116            data.append ((fields[0].strip(), [extractSI(v) for v in fields[1:]]))
117        line = f.readline().strip()
118    # Man, working out this next incantation out was non-trivial!
119    # They really want you to be snarfing data in csv or some other
120    # format they understand!
121    res = pd.DataFrame.from_items(data, columns=fieldnames[1:], orient='index')
122    return res
123
124def readTimers(f):
125    """Skip lines with leading #"""
126    line = f.readline()
127    while line[0] == '#':
128        line = f.readline()
129    line = line.strip()
130    if line == "Statistics on exit\n" or "Aggregate for all threads\n":
131        line = f.readline()
132    return readData(f)
133
134def readCounters(f):
135    """This can be just the same!"""
136    return readData(f)
137
138def readFile(fname):
139    """Read the statistics from the file. Return a dict with keys "timers", "counters" """
140    res = {}
141    try:
142        with open(fname) as f:
143            res["timers"]   = readTimers(f)
144            res["counters"] = readCounters(f)
145            return res
146    except (OSError, IOError):
147        print("Cannot open " + fname)
148        return None
149
150def usefulValues(l):
151    """I.e. values which are neither null nor zero"""
152    return [p and q for (p,q) in zip (pd.notnull(l), l != 0.0)]
153
154def uselessValues(l):
155    """I.e. values which are null or zero"""
156    return [not p for p in usefulValues(l)]
157
158interestingStats = ("counters", "timers")
159statProperties   = {"counters" : ("Count", "Counter Statistics"),
160                    "timers"   : ("Time (ticks)", "Timer Statistics")
161                   }
162
163def drawChart(data, kind, filebase):
164    """Draw a summary bar chart for the requested data frame into the specified file"""
165    data["Mean"].plot(kind="bar", logy=True, grid=True, colormap="GnBu",
166                      yerr=data["SD"], ecolor="black")
167    plt.xlabel("OMP Constructs")
168    plt.ylabel(statProperties[kind][0])
169    plt.title (statProperties[kind][1])
170    plt.tight_layout()
171    plt.savefig(filebase+"_"+kind)
172
173def normalizeValues(data, countField, factor):
174    """Normalize values into a rate by dividing them all by the given factor"""
175    data[[k for k in data.keys() if k != countField]] /= factor
176
177
178def setRadarFigure(titles):
179    """Set the attributes for the radar plots"""
180    fig = plt.figure(figsize=(9,9))
181    rect = [0.1, 0.1, 0.8, 0.8]
182    labels = [0.2, 0.4, 0.6, 0.8, 1, 2, 3, 4, 5, 10]
183    matplotlib.rcParams.update({'font.size':13})
184    theta = radar_factory(len(titles))
185    ax = fig.add_axes(rect, projection='radar')
186    ax.set_rgrids(labels)
187    ax.set_varlabels(titles)
188    ax.text(theta[2], 1, "Linear->Log", horizontalalignment='center', color='green', fontsize=18)
189    return {'ax':ax, 'theta':theta}
190
191
192def drawRadarChart(data, kind, filebase, params, color):
193    """Draw the radar plots"""
194    tmp_lin = data * 0
195    tmp_log = data * 0
196    for key in data.keys():
197        if data[key] >= 1:
198           tmp_log[key] = np.log10(data[key])
199        else:
200           tmp_lin[key] = (data[key])
201    params['ax'].plot(params['theta'], tmp_log, color='b', label=filebase+"_"+kind+"_log")
202    params['ax'].plot(params['theta'], tmp_lin, color='r', label=filebase+"_"+kind+"_linear")
203    params['ax'].legend(loc='best', bbox_to_anchor=(1.4,1.2))
204    params['ax'].set_rlim((0, np.ceil(max(tmp_log))))
205
206def multiAppBarChartSettings(ax, plt, index, width, n, tmp, s):
207    ax.set_yscale('log')
208    ax.legend()
209    ax.set_xticks(index + width * n / 2)
210    ax.set_xticklabels(tmp[s]['Total'].keys(), rotation=50, horizontalalignment='right')
211    plt.xlabel("OMP Constructs")
212    plt.ylabel(statProperties[s][0])
213    plt.title(statProperties[s][1])
214    plt.tight_layout()
215
216def derivedTimerStats(data):
217    stats = {}
218    for key in data.keys():
219        if key == 'OMP_worker_thread_life':
220            totalRuntime = data['OMP_worker_thread_life']
221        elif key in ('FOR_static_iterations', 'OMP_PARALLEL_args',
222                     'OMP_set_numthreads', 'FOR_dynamic_iterations'):
223            break
224        else:
225            stats[key] = 100 * data[key] / totalRuntime
226    return stats
227
228def compPie(data):
229    compKeys = {}
230    nonCompKeys = {}
231    for key in data.keys():
232        if key in ('OMP_critical', 'OMP_single', 'OMP_serial',
233                   'OMP_parallel', 'OMP_master', 'OMP_task_immediate',
234                   'OMP_task_taskwait', 'OMP_task_taskyield', 'OMP_task_taskgroup',
235                   'OMP_task_join_bar', 'OMP_task_plain_bar', 'OMP_task_taskyield'):
236            compKeys[key] = data[key]
237        else:
238            nonCompKeys[key] = data[key]
239    print("comp keys:", compKeys, "\n\n non comp keys:", nonCompKeys)
240    return [compKeys, nonCompKeys]
241
242def drawMainPie(data, filebase, colors):
243    sizes = [sum(data[0].values()), sum(data[1].values())]
244    explode = [0,0]
245    labels = ["Compute - " + "%.2f" % sizes[0], "Non Compute - " + "%.2f" % sizes[1]]
246    patches = plt.pie(sizes, explode, colors=colors, startangle=90)
247    plt.title("Time Division")
248    plt.axis('equal')
249    plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16)
250    plt.savefig(filebase+"_main_pie", bbox_inches='tight')
251
252def drawSubPie(data, tag, filebase, colors):
253    explode = []
254    labels = data.keys()
255    sizes = data.values()
256    total = sum(sizes)
257    percent = []
258    for i in range(len(sizes)):
259        explode.append(0)
260        percent.append(100 * sizes[i] / total)
261        labels[i] = labels[i] + " - %.2f" % percent[i]
262    patches = plt.pie(sizes, explode=explode, colors=colors, startangle=90)
263    plt.title(tag+"(Percentage of Total:"+" %.2f" % (sum(data.values()))+")")
264    plt.tight_layout()
265    plt.axis('equal')
266    plt.legend(patches[0], labels, loc='best', bbox_to_anchor=(-0.1,1), fontsize=16)
267    plt.savefig(filebase+"_"+tag, bbox_inches='tight')
268
269def main():
270    parser = argparse.ArgumentParser(description='''This script takes a list
271        of files containing each of which contain output from a stats-gathering
272        enabled OpenMP runtime library.  Each stats file is read, parsed, and
273        used to produce a summary of the statistics''')
274    parser.add_argument('files', nargs='+',
275        help='files to parse which contain stats-gathering output')
276    command_args = parser.parse_args()
277    colors = ['orange', 'b', 'r', 'yellowgreen', 'lightsage', 'lightpink',
278              'green', 'purple', 'yellow', 'cyan', 'mediumturquoise',
279              'olive']
280    stats = {}
281    matplotlib.rcParams.update({'font.size':22})
282    for s in interestingStats:
283        fig, ax = plt.subplots()
284        width = 0.45
285        n = 0
286        index = 0
287
288        for f in command_args.files:
289            filebase = os.path.splitext(f)[0]
290            tmp = readFile(f)
291            data = tmp[s]['Total']
292            """preventing repetition by removing rows similar to Total_OMP_work
293                as Total_OMP_work['Total'] is same as OMP_work['Total']"""
294            if s == 'counters':
295                elapsedTime = tmp["timers"]["Mean"]["OMP_worker_thread_life"]
296                normalizeValues(tmp["counters"], "SampleCount",
297                    elapsedTime / 1.e9)
298                """Plotting radar charts"""
299                params = setRadarFigure(data.keys())
300                chartType = "radar"
301                drawRadarChart(data, s, filebase, params, colors[n])
302                """radar Charts finish here"""
303                plt.savefig(filebase + "_" + s + "_" + chartType, bbox_inches="tight")
304            elif s == "timers":
305                print("overheads in " + filebase)
306                numThreads = tmp[s]["SampleCount"]["Total_OMP_parallel"]
307                for key in data.keys():
308                    if key[0:5] == 'Total':
309                        del data[key]
310                stats[filebase] = derivedTimerStats(data)
311                dataSubSet = compPie(stats[filebase])
312                drawMainPie(dataSubSet, filebase, colors)
313                plt.figure(0)
314                drawSubPie(dataSubSet[0], "Computational Time", filebase, colors)
315                plt.figure(1)
316                drawSubPie(dataSubSet[1], "Non Computational Time", filebase, colors)
317                with open('derivedStats_{}.csv'.format(filebase), 'w') as f:
318                    f.write('================={}====================\n'.format(filebase))
319                    f.write(pd.DataFrame(stats[filebase].items()).to_csv()+'\n')
320            n += 1
321    plt.close()
322
323if __name__ == "__main__":
324    main()
325