xref: /llvm-project/llvm/utils/llvm-locstats/llvm-locstats.py (revision be031b17191efb32636a05d2e39433446c328d92)
1#!/usr/bin/env python3
2#
3# This is a tool that works like debug location coverage calculator.
4# It parses the llvm-dwarfdump --statistics output by reporting it
5# in a more human readable way.
6#
7
8from __future__ import print_function
9import argparse
10import os
11import sys
12from json import loads
13from math import ceil
14from collections import OrderedDict
15from subprocess import Popen, PIPE
16
17# This special value has been used to mark statistics that overflowed.
18TAINT_VALUE = "tainted"
19
20# Initialize the plot.
21def init_plot(plt):
22    plt.title("Debug Location Statistics", fontweight="bold")
23    plt.xlabel("location buckets")
24    plt.ylabel("number of variables in the location buckets")
25    plt.xticks(rotation=45, fontsize="x-small")
26    plt.yticks()
27
28
29# Finalize the plot.
30def finish_plot(plt):
31    plt.legend()
32    plt.grid(color="grey", which="major", axis="y", linestyle="-", linewidth=0.3)
33    plt.savefig("locstats.png")
34    print('The plot was saved within "locstats.png".')
35
36
37# Holds the debug location statistics.
38class LocationStats:
39    def __init__(
40        self,
41        file_name,
42        variables_total,
43        variables_total_locstats,
44        variables_with_loc,
45        variables_scope_bytes_covered,
46        variables_scope_bytes,
47        variables_coverage_map,
48    ):
49        self.file_name = file_name
50        self.variables_total = variables_total
51        self.variables_total_locstats = variables_total_locstats
52        self.variables_with_loc = variables_with_loc
53        self.scope_bytes_covered = variables_scope_bytes_covered
54        self.scope_bytes = variables_scope_bytes
55        self.variables_coverage_map = variables_coverage_map
56
57    # Get the PC ranges coverage.
58    def get_pc_coverage(self):
59        if self.scope_bytes_covered == TAINT_VALUE or self.scope_bytes == TAINT_VALUE:
60            return TAINT_VALUE
61        pc_ranges_covered = int(
62            ceil(self.scope_bytes_covered * 100.0) / self.scope_bytes
63        )
64        return pc_ranges_covered
65
66    # Pretty print the debug location buckets.
67    def pretty_print(self):
68        if self.scope_bytes == 0:
69            print("No scope bytes found.")
70            return -1
71
72        pc_ranges_covered = self.get_pc_coverage()
73        variables_coverage_per_map = {}
74        for cov_bucket in coverage_buckets():
75            variables_coverage_per_map[cov_bucket] = None
76            if (
77                self.variables_coverage_map[cov_bucket] == TAINT_VALUE
78                or self.variables_total_locstats == TAINT_VALUE
79            ):
80                variables_coverage_per_map[cov_bucket] = TAINT_VALUE
81            else:
82                variables_coverage_per_map[cov_bucket] = int(
83                    ceil(self.variables_coverage_map[cov_bucket] * 100.0)
84                    / self.variables_total_locstats
85                )
86
87        print(" =================================================")
88        print("            Debug Location Statistics       ")
89        print(" =================================================")
90        print("     cov%           samples         percentage(~)  ")
91        print(" -------------------------------------------------")
92        for cov_bucket in coverage_buckets():
93            if (
94                self.variables_coverage_map[cov_bucket]
95                or self.variables_total_locstats == TAINT_VALUE
96            ):
97                print(
98                    "   {0:10}     {1:8}              {2:3}%".format(
99                        cov_bucket,
100                        self.variables_coverage_map[cov_bucket],
101                        variables_coverage_per_map[cov_bucket],
102                    )
103                )
104            else:
105                print(
106                    "   {0:10}     {1:8d}              {2:3d}%".format(
107                        cov_bucket,
108                        self.variables_coverage_map[cov_bucket],
109                        variables_coverage_per_map[cov_bucket],
110                    )
111                )
112        print(" =================================================")
113        print(
114            " -the number of debug variables processed: "
115            + str(self.variables_total_locstats)
116        )
117        print(" -PC ranges covered: " + str(pc_ranges_covered) + "%")
118
119        # Only if we are processing all the variables output the total
120        # availability.
121        if self.variables_total and self.variables_with_loc:
122            total_availability = None
123            if (
124                self.variables_total == TAINT_VALUE
125                or self.variables_with_loc == TAINT_VALUE
126            ):
127                total_availability = TAINT_VALUE
128            else:
129                total_availability = int(
130                    ceil(self.variables_with_loc * 100.0) / self.variables_total
131                )
132            print(" -------------------------------------------------")
133            print(" -total availability: " + str(total_availability) + "%")
134        print(" =================================================")
135
136        return 0
137
138    # Draw a plot representing the location buckets.
139    def draw_plot(self):
140        from matplotlib import pyplot as plt
141
142        buckets = range(len(self.variables_coverage_map))
143        plt.figure(figsize=(12, 8))
144        init_plot(plt)
145        plt.bar(
146            buckets,
147            self.variables_coverage_map.values(),
148            align="center",
149            tick_label=self.variables_coverage_map.keys(),
150            label="variables of {}".format(self.file_name),
151        )
152
153        # Place the text box with the coverage info.
154        pc_ranges_covered = self.get_pc_coverage()
155        props = dict(boxstyle="round", facecolor="wheat", alpha=0.5)
156        plt.text(
157            0.02,
158            0.90,
159            "PC ranges covered: {}%".format(pc_ranges_covered),
160            transform=plt.gca().transAxes,
161            fontsize=12,
162            verticalalignment="top",
163            bbox=props,
164        )
165
166        finish_plot(plt)
167
168    # Compare the two LocationStats objects and draw a plot showing
169    # the difference.
170    def draw_location_diff(self, locstats_to_compare):
171        from matplotlib import pyplot as plt
172
173        pc_ranges_covered = self.get_pc_coverage()
174        pc_ranges_covered_to_compare = locstats_to_compare.get_pc_coverage()
175
176        buckets = range(len(self.variables_coverage_map))
177        buckets_to_compare = range(len(locstats_to_compare.variables_coverage_map))
178
179        fig = plt.figure(figsize=(12, 8))
180        ax = fig.add_subplot(111)
181        init_plot(plt)
182
183        comparison_keys = list(coverage_buckets())
184        ax.bar(
185            buckets,
186            self.variables_coverage_map.values(),
187            align="edge",
188            width=0.4,
189            label="variables of {}".format(self.file_name),
190        )
191        ax.bar(
192            buckets_to_compare,
193            locstats_to_compare.variables_coverage_map.values(),
194            color="r",
195            align="edge",
196            width=-0.4,
197            label="variables of {}".format(locstats_to_compare.file_name),
198        )
199        ax.set_xticks(range(len(comparison_keys)))
200        ax.set_xticklabels(comparison_keys)
201
202        props = dict(boxstyle="round", facecolor="wheat", alpha=0.5)
203        plt.text(
204            0.02,
205            0.88,
206            "{} PC ranges covered: {}%".format(self.file_name, pc_ranges_covered),
207            transform=plt.gca().transAxes,
208            fontsize=12,
209            verticalalignment="top",
210            bbox=props,
211        )
212        plt.text(
213            0.02,
214            0.83,
215            "{} PC ranges covered: {}%".format(
216                locstats_to_compare.file_name, pc_ranges_covered_to_compare
217            ),
218            transform=plt.gca().transAxes,
219            fontsize=12,
220            verticalalignment="top",
221            bbox=props,
222        )
223
224        finish_plot(plt)
225
226
227# Define the location buckets.
228def coverage_buckets():
229    yield "0%"
230    yield "(0%,10%)"
231    for start in range(10, 91, 10):
232        yield "[{0}%,{1}%)".format(start, start + 10)
233    yield "100%"
234
235
236# Parse the JSON representing the debug statistics, and create a
237# LocationStats object.
238def parse_locstats(opts, binary):
239    # These will be different due to different options enabled.
240    variables_total = None
241    variables_total_locstats = None
242    variables_with_loc = None
243    variables_scope_bytes_covered = None
244    variables_scope_bytes = None
245    variables_scope_bytes_entry_values = None
246    variables_coverage_map = OrderedDict()
247
248    # Get the directory of the LLVM tools.
249    llvm_dwarfdump_cmd = os.path.join(os.path.dirname(__file__), "llvm-dwarfdump")
250    # The statistics llvm-dwarfdump option.
251    llvm_dwarfdump_stats_opt = "--statistics"
252
253    # Generate the stats with the llvm-dwarfdump.
254    subproc = Popen(
255        [llvm_dwarfdump_cmd, llvm_dwarfdump_stats_opt, binary],
256        stdin=PIPE,
257        stdout=PIPE,
258        stderr=PIPE,
259        universal_newlines=True,
260    )
261    cmd_stdout, cmd_stderr = subproc.communicate()
262
263    # TODO: Handle errors that are coming from llvm-dwarfdump.
264
265    # Get the JSON and parse it.
266    json_parsed = None
267
268    try:
269        json_parsed = loads(cmd_stdout)
270    except:
271        print("error: No valid llvm-dwarfdump statistics found.")
272        sys.exit(1)
273
274    # TODO: Parse the statistics Version from JSON.
275
276    def init_field(name):
277        if json_parsed[name] == "overflowed":
278            print('warning: "' + name + '" field overflowed.')
279            return TAINT_VALUE
280        return json_parsed[name]
281
282    if opts.only_variables:
283        # Read the JSON only for local variables.
284        variables_total_locstats = init_field(
285            "#local vars processed by location statistics"
286        )
287        variables_scope_bytes_covered = init_field(
288            "sum_all_local_vars(#bytes in parent scope covered" " by DW_AT_location)"
289        )
290        variables_scope_bytes = init_field("sum_all_local_vars(#bytes in parent scope)")
291        if not opts.ignore_debug_entry_values:
292            for cov_bucket in coverage_buckets():
293                cov_category = (
294                    "#local vars with {} of parent scope covered "
295                    "by DW_AT_location".format(cov_bucket)
296                )
297                variables_coverage_map[cov_bucket] = init_field(cov_category)
298        else:
299            variables_scope_bytes_entry_values = init_field(
300                "sum_all_local_vars(#bytes in parent scope "
301                "covered by DW_OP_entry_value)"
302            )
303            if (
304                variables_scope_bytes_covered != TAINT_VALUE
305                and variables_scope_bytes_entry_values != TAINT_VALUE
306            ):
307                variables_scope_bytes_covered = (
308                    variables_scope_bytes_covered - variables_scope_bytes_entry_values
309                )
310            for cov_bucket in coverage_buckets():
311                cov_category = (
312                    "#local vars - entry values with {} of parent scope "
313                    "covered by DW_AT_location".format(cov_bucket)
314                )
315                variables_coverage_map[cov_bucket] = init_field(cov_category)
316    elif opts.only_formal_parameters:
317        # Read the JSON only for formal parameters.
318        variables_total_locstats = init_field(
319            "#params processed by location statistics"
320        )
321        variables_scope_bytes_covered = init_field(
322            "sum_all_params(#bytes in parent scope covered " "by DW_AT_location)"
323        )
324        variables_scope_bytes = init_field("sum_all_params(#bytes in parent scope)")
325        if not opts.ignore_debug_entry_values:
326            for cov_bucket in coverage_buckets():
327                cov_category = (
328                    "#params with {} of parent scope covered "
329                    "by DW_AT_location".format(cov_bucket)
330                )
331                variables_coverage_map[cov_bucket] = init_field(cov_category)
332        else:
333            variables_scope_bytes_entry_values = init_field(
334                "sum_all_params(#bytes in parent scope covered " "by DW_OP_entry_value)"
335            )
336            if (
337                variables_scope_bytes_covered != TAINT_VALUE
338                and variables_scope_bytes_entry_values != TAINT_VALUE
339            ):
340                variables_scope_bytes_covered = (
341                    variables_scope_bytes_covered - variables_scope_bytes_entry_values
342                )
343            for cov_bucket in coverage_buckets():
344                cov_category = (
345                    "#params - entry values with {} of parent scope covered"
346                    " by DW_AT_location".format(cov_bucket)
347                )
348                variables_coverage_map[cov_bucket] = init_field(cov_category)
349    else:
350        # Read the JSON for both local variables and formal parameters.
351        variables_total = init_field("#source variables")
352        variables_with_loc = init_field("#source variables with location")
353        variables_total_locstats = init_field(
354            "#variables processed by location statistics"
355        )
356        variables_scope_bytes_covered = init_field(
357            "sum_all_variables(#bytes in parent scope covered " "by DW_AT_location)"
358        )
359        variables_scope_bytes = init_field("sum_all_variables(#bytes in parent scope)")
360
361        if not opts.ignore_debug_entry_values:
362            for cov_bucket in coverage_buckets():
363                cov_category = (
364                    "#variables with {} of parent scope covered "
365                    "by DW_AT_location".format(cov_bucket)
366                )
367                variables_coverage_map[cov_bucket] = init_field(cov_category)
368        else:
369            variables_scope_bytes_entry_values = init_field(
370                "sum_all_variables(#bytes in parent scope covered "
371                "by DW_OP_entry_value)"
372            )
373            if (
374                variables_scope_bytes_covered != TAINT_VALUE
375                and variables_scope_bytes_entry_values != TAINT_VALUE
376            ):
377                variables_scope_bytes_covered = (
378                    variables_scope_bytes_covered - variables_scope_bytes_entry_values
379                )
380            for cov_bucket in coverage_buckets():
381                cov_category = (
382                    "#variables - entry values with {} of parent scope covered "
383                    "by DW_AT_location".format(cov_bucket)
384                )
385                variables_coverage_map[cov_bucket] = init_field(cov_category)
386
387    return LocationStats(
388        binary,
389        variables_total,
390        variables_total_locstats,
391        variables_with_loc,
392        variables_scope_bytes_covered,
393        variables_scope_bytes,
394        variables_coverage_map,
395    )
396
397
398# Parse the program arguments.
399def parse_program_args(parser):
400    parser.add_argument(
401        "--only-variables",
402        action="store_true",
403        default=False,
404        help="calculate the location statistics only for local variables",
405    )
406    parser.add_argument(
407        "--only-formal-parameters",
408        action="store_true",
409        default=False,
410        help="calculate the location statistics only for formal parameters",
411    )
412    parser.add_argument(
413        "--ignore-debug-entry-values",
414        action="store_true",
415        default=False,
416        help="ignore the location statistics on locations with " "entry values",
417    )
418    parser.add_argument(
419        "--draw-plot",
420        action="store_true",
421        default=False,
422        help="show histogram of location buckets generated (requires " "matplotlib)",
423    )
424    parser.add_argument(
425        "--compare",
426        action="store_true",
427        default=False,
428        help="compare the debug location coverage on two files provided, "
429        "and draw a plot showing the difference  (requires "
430        "matplotlib)",
431    )
432    parser.add_argument("file_names", nargs="+", type=str, help="file to process")
433
434    return parser.parse_args()
435
436
437# Verify that the program inputs meet the requirements.
438def verify_program_inputs(opts):
439    if len(sys.argv) < 2:
440        print("error: Too few arguments.")
441        return False
442
443    if opts.only_variables and opts.only_formal_parameters:
444        print("error: Please use just one --only* option.")
445        return False
446
447    if not opts.compare and len(opts.file_names) != 1:
448        print("error: Please specify only one file to process.")
449        return False
450
451    if opts.compare and len(opts.file_names) != 2:
452        print("error: Please specify two files to process.")
453        return False
454
455    if opts.draw_plot or opts.compare:
456        try:
457            import matplotlib
458        except ImportError:
459            print("error: matplotlib not found.")
460            return False
461
462    return True
463
464
465def Main():
466    parser = argparse.ArgumentParser()
467    opts = parse_program_args(parser)
468
469    if not verify_program_inputs(opts):
470        parser.print_help()
471        sys.exit(1)
472
473    binary_file = opts.file_names[0]
474    locstats = parse_locstats(opts, binary_file)
475
476    if not opts.compare:
477        if opts.draw_plot:
478            # Draw a histogram representing the location buckets.
479            locstats.draw_plot()
480        else:
481            # Pretty print collected info on the standard output.
482            if locstats.pretty_print() == -1:
483                sys.exit(0)
484    else:
485        binary_file_to_compare = opts.file_names[1]
486        locstats_to_compare = parse_locstats(opts, binary_file_to_compare)
487        # Draw a plot showing the difference in debug location coverage between
488        # two files.
489        locstats.draw_location_diff(locstats_to_compare)
490
491
492if __name__ == "__main__":
493    Main()
494    sys.exit(0)
495