xref: /llvm-project/llvm/utils/llvm-original-di-preservation.py (revision b71edfaa4ec3c998aadb35255ce2f60bba2940b0)
1#!/usr/bin/env python
2#
3# Debugify summary for the original debug info testing.
4#
5
6from __future__ import print_function
7import argparse
8import os
9import sys
10from json import loads
11from collections import defaultdict
12from collections import OrderedDict
13
14
15class DILocBug:
16    def __init__(self, action, bb_name, fn_name, instr):
17        self.action = action
18        self.bb_name = bb_name
19        self.fn_name = fn_name
20        self.instr = instr
21
22    def __str__(self):
23        return self.action + self.bb_name + self.fn_name + self.instr
24
25
26class DISPBug:
27    def __init__(self, action, fn_name):
28        self.action = action
29        self.fn_name = fn_name
30
31    def __str__(self):
32        return self.action + self.fn_name
33
34
35class DIVarBug:
36    def __init__(self, action, name, fn_name):
37        self.action = action
38        self.name = name
39        self.fn_name = fn_name
40
41    def __str__(self):
42        return self.action + self.name + self.fn_name
43
44
45# Report the bugs in form of html.
46def generate_html_report(
47    di_location_bugs,
48    di_subprogram_bugs,
49    di_var_bugs,
50    di_location_bugs_summary,
51    di_sp_bugs_summary,
52    di_var_bugs_summary,
53    html_file,
54):
55    fileout = open(html_file, "w")
56
57    html_header = """ <html>
58  <head>
59  <style>
60  table, th, td {
61    border: 1px solid black;
62  }
63  table.center {
64    margin-left: auto;
65    margin-right: auto;
66  }
67  </style>
68  </head>
69  <body>
70  """
71
72    # Create the table for Location bugs.
73    table_title_di_loc = "Location Bugs found by the Debugify"
74
75    table_di_loc = """<table>
76  <caption><b>{}</b></caption>
77  <tr>
78  """.format(
79        table_title_di_loc
80    )
81
82    header_di_loc = [
83        "File",
84        "LLVM Pass Name",
85        "LLVM IR Instruction",
86        "Function Name",
87        "Basic Block Name",
88        "Action",
89    ]
90
91    for column in header_di_loc:
92        table_di_loc += "    <th>{0}</th>\n".format(column.strip())
93    table_di_loc += "  </tr>\n"
94
95    at_least_one_bug_found = False
96
97    # Handle loction bugs.
98    for file, per_file_bugs in di_location_bugs.items():
99        for llvm_pass, per_pass_bugs in per_file_bugs.items():
100            # No location bugs for the pass.
101            if len(per_pass_bugs) == 0:
102                continue
103            at_least_one_bug_found = True
104            row = []
105            table_di_loc += "  </tr>\n"
106            # Get the bugs info.
107            for x in per_pass_bugs:
108                row.append("    <tr>\n")
109                row.append(file)
110                row.append(llvm_pass)
111                row.append(x.instr)
112                row.append(x.fn_name)
113                row.append(x.bb_name)
114                row.append(x.action)
115                row.append("    </tr>\n")
116            # Dump the bugs info into the table.
117            for column in row:
118                # The same file-pass pair can have multiple bugs.
119                if column == "    <tr>\n" or column == "    </tr>\n":
120                    table_di_loc += column
121                    continue
122                table_di_loc += "    <td>{0}</td>\n".format(column.strip())
123            table_di_loc += "  <tr>\n"
124
125    if not at_least_one_bug_found:
126        table_di_loc += """  <tr>
127        <td colspan='7'> No bugs found </td>
128      </tr>
129    """
130    table_di_loc += "</table>\n"
131
132    # Create the summary table for the loc bugs.
133    table_title_di_loc_sum = "Summary of Location Bugs"
134    table_di_loc_sum = """<table>
135  <caption><b>{}</b></caption>
136  <tr>
137  """.format(
138        table_title_di_loc_sum
139    )
140
141    header_di_loc_sum = ["LLVM Pass Name", "Number of bugs"]
142
143    for column in header_di_loc_sum:
144        table_di_loc_sum += "    <th>{0}</th>\n".format(column.strip())
145    table_di_loc_sum += "  </tr>\n"
146
147    # Print the summary.
148    row = []
149    for llvm_pass, num in sorted(di_location_bugs_summary.items()):
150        row.append("    <tr>\n")
151        row.append(llvm_pass)
152        row.append(str(num))
153        row.append("    </tr>\n")
154    for column in row:
155        if column == "    <tr>\n" or column == "    </tr>\n":
156            table_di_loc_sum += column
157            continue
158        table_di_loc_sum += "    <td>{0}</td>\n".format(column.strip())
159    table_di_loc_sum += "  <tr>\n"
160
161    if not at_least_one_bug_found:
162        table_di_loc_sum += """<tr>
163        <td colspan='2'> No bugs found </td>
164      </tr>
165    """
166    table_di_loc_sum += "</table>\n"
167
168    # Create the table for SP bugs.
169    table_title_di_sp = "SP Bugs found by the Debugify"
170    table_di_sp = """<table>
171  <caption><b>{}</b></caption>
172  <tr>
173  """.format(
174        table_title_di_sp
175    )
176
177    header_di_sp = ["File", "LLVM Pass Name", "Function Name", "Action"]
178
179    for column in header_di_sp:
180        table_di_sp += "    <th>{0}</th>\n".format(column.strip())
181    table_di_sp += "  </tr>\n"
182
183    at_least_one_bug_found = False
184
185    # Handle fn bugs.
186    for file, per_file_bugs in di_subprogram_bugs.items():
187        for llvm_pass, per_pass_bugs in per_file_bugs.items():
188            # No SP bugs for the pass.
189            if len(per_pass_bugs) == 0:
190                continue
191            at_least_one_bug_found = True
192            row = []
193            table_di_sp += "  </tr>\n"
194            # Get the bugs info.
195            for x in per_pass_bugs:
196                row.append("    <tr>\n")
197                row.append(file)
198                row.append(llvm_pass)
199                row.append(x.fn_name)
200                row.append(x.action)
201                row.append("    </tr>\n")
202            # Dump the bugs info into the table.
203            for column in row:
204                # The same file-pass pair can have multiple bugs.
205                if column == "    <tr>\n" or column == "    </tr>\n":
206                    table_di_sp += column
207                    continue
208                table_di_sp += "    <td>{0}</td>\n".format(column.strip())
209            table_di_sp += "  <tr>\n"
210
211    if not at_least_one_bug_found:
212        table_di_sp += """<tr>
213        <td colspan='4'> No bugs found </td>
214      </tr>
215    """
216    table_di_sp += "</table>\n"
217
218    # Create the summary table for the sp bugs.
219    table_title_di_sp_sum = "Summary of SP Bugs"
220    table_di_sp_sum = """<table>
221  <caption><b>{}</b></caption>
222  <tr>
223  """.format(
224        table_title_di_sp_sum
225    )
226
227    header_di_sp_sum = ["LLVM Pass Name", "Number of bugs"]
228
229    for column in header_di_sp_sum:
230        table_di_sp_sum += "    <th>{0}</th>\n".format(column.strip())
231    table_di_sp_sum += "  </tr>\n"
232
233    # Print the summary.
234    row = []
235    for llvm_pass, num in sorted(di_sp_bugs_summary.items()):
236        row.append("    <tr>\n")
237        row.append(llvm_pass)
238        row.append(str(num))
239        row.append("    </tr>\n")
240    for column in row:
241        if column == "    <tr>\n" or column == "    </tr>\n":
242            table_di_sp_sum += column
243            continue
244        table_di_sp_sum += "    <td>{0}</td>\n".format(column.strip())
245    table_di_sp_sum += "  <tr>\n"
246
247    if not at_least_one_bug_found:
248        table_di_sp_sum += """<tr>
249        <td colspan='2'> No bugs found </td>
250      </tr>
251    """
252    table_di_sp_sum += "</table>\n"
253
254    # Create the table for Variable bugs.
255    table_title_di_var = "Variable Location Bugs found by the Debugify"
256    table_di_var = """<table>
257  <caption><b>{}</b></caption>
258  <tr>
259  """.format(
260        table_title_di_var
261    )
262
263    header_di_var = ["File", "LLVM Pass Name", "Variable", "Function", "Action"]
264
265    for column in header_di_var:
266        table_di_var += "    <th>{0}</th>\n".format(column.strip())
267    table_di_var += "  </tr>\n"
268
269    at_least_one_bug_found = False
270
271    # Handle var bugs.
272    for file, per_file_bugs in di_var_bugs.items():
273        for llvm_pass, per_pass_bugs in per_file_bugs.items():
274            # No SP bugs for the pass.
275            if len(per_pass_bugs) == 0:
276                continue
277            at_least_one_bug_found = True
278            row = []
279            table_di_var += "  </tr>\n"
280            # Get the bugs info.
281            for x in per_pass_bugs:
282                row.append("    <tr>\n")
283                row.append(file)
284                row.append(llvm_pass)
285                row.append(x.name)
286                row.append(x.fn_name)
287                row.append(x.action)
288                row.append("    </tr>\n")
289            # Dump the bugs info into the table.
290            for column in row:
291                # The same file-pass pair can have multiple bugs.
292                if column == "    <tr>\n" or column == "    </tr>\n":
293                    table_di_var += column
294                    continue
295                table_di_var += "    <td>{0}</td>\n".format(column.strip())
296            table_di_var += "  <tr>\n"
297
298    if not at_least_one_bug_found:
299        table_di_var += """<tr>
300        <td colspan='4'> No bugs found </td>
301      </tr>
302    """
303    table_di_var += "</table>\n"
304
305    # Create the summary table for the sp bugs.
306    table_title_di_var_sum = "Summary of Variable Location Bugs"
307    table_di_var_sum = """<table>
308  <caption><b>{}</b></caption>
309  <tr>
310  """.format(
311        table_title_di_var_sum
312    )
313
314    header_di_var_sum = ["LLVM Pass Name", "Number of bugs"]
315
316    for column in header_di_var_sum:
317        table_di_var_sum += "    <th>{0}</th>\n".format(column.strip())
318    table_di_var_sum += "  </tr>\n"
319
320    # Print the summary.
321    row = []
322    for llvm_pass, num in sorted(di_var_bugs_summary.items()):
323        row.append("    <tr>\n")
324        row.append(llvm_pass)
325        row.append(str(num))
326        row.append("    </tr>\n")
327    for column in row:
328        if column == "    <tr>\n" or column == "    </tr>\n":
329            table_di_var_sum += column
330            continue
331        table_di_var_sum += "    <td>{0}</td>\n".format(column.strip())
332    table_di_var_sum += "  <tr>\n"
333
334    if not at_least_one_bug_found:
335        table_di_var_sum += """<tr>
336        <td colspan='2'> No bugs found </td>
337      </tr>
338    """
339    table_di_var_sum += "</table>\n"
340
341    # Finish the html page.
342    html_footer = """</body>
343  </html>"""
344
345    new_line = "<br>\n"
346
347    fileout.writelines(html_header)
348    fileout.writelines(table_di_loc)
349    fileout.writelines(new_line)
350    fileout.writelines(table_di_loc_sum)
351    fileout.writelines(new_line)
352    fileout.writelines(new_line)
353    fileout.writelines(table_di_sp)
354    fileout.writelines(new_line)
355    fileout.writelines(table_di_sp_sum)
356    fileout.writelines(new_line)
357    fileout.writelines(new_line)
358    fileout.writelines(table_di_var)
359    fileout.writelines(new_line)
360    fileout.writelines(table_di_var_sum)
361    fileout.writelines(html_footer)
362    fileout.close()
363
364    print("The " + html_file + " generated.")
365
366
367# Read the JSON file in chunks.
368def get_json_chunk(file, start, size):
369    json_parsed = None
370    di_checker_data = []
371    skipped_lines = 0
372    line = 0
373
374    # The file contains json object per line.
375    # An example of the line (formatted json):
376    # {
377    #  "file": "simple.c",
378    #  "pass": "Deduce function attributes in RPO",
379    #  "bugs": [
380    #    [
381    #      {
382    #        "action": "drop",
383    #        "metadata": "DISubprogram",
384    #        "name": "fn2"
385    #      },
386    #      {
387    #        "action": "drop",
388    #        "metadata": "DISubprogram",
389    #        "name": "fn1"
390    #      }
391    #    ]
392    #  ]
393    # }
394    with open(file) as json_objects_file:
395        for json_object_line in json_objects_file:
396            line += 1
397            if line < start:
398                continue
399            if line >= start + size:
400                break
401            try:
402                json_object = loads(json_object_line)
403            except:
404                skipped_lines += 1
405            else:
406                di_checker_data.append(json_object)
407
408    return (di_checker_data, skipped_lines, line)
409
410
411# Parse the program arguments.
412def parse_program_args(parser):
413    parser.add_argument("file_name", type=str, help="json file to process")
414    parser.add_argument("html_file", type=str, help="html file to output data")
415    parser.add_argument(
416        "-compress", action="store_true", help="create reduced html report"
417    )
418
419    return parser.parse_args()
420
421
422def Main():
423    parser = argparse.ArgumentParser()
424    opts = parse_program_args(parser)
425
426    if not opts.html_file.endswith(".html"):
427        print("error: The output file must be '.html'.")
428        sys.exit(1)
429
430    # Use the defaultdict in order to make multidim dicts.
431    di_location_bugs = defaultdict(lambda: defaultdict(dict))
432    di_subprogram_bugs = defaultdict(lambda: defaultdict(dict))
433    di_variable_bugs = defaultdict(lambda: defaultdict(dict))
434
435    # Use the ordered dict to make a summary.
436    di_location_bugs_summary = OrderedDict()
437    di_sp_bugs_summary = OrderedDict()
438    di_var_bugs_summary = OrderedDict()
439
440    # Compress similar bugs.
441    # DILocBugs with same pass & instruction name.
442    di_loc_pass_instr_set = set()
443    # DISPBugs with same pass & function name.
444    di_sp_pass_fn_set = set()
445    # DIVarBugs with same pass & variable name.
446    di_var_pass_var_set = set()
447
448    start_line = 0
449    chunk_size = 1000000
450    end_line = chunk_size - 1
451    skipped_lines = 0
452    skipped_bugs = 0
453    # Process each chunk of 1 million JSON lines.
454    while True:
455        if start_line > end_line:
456            break
457        (debug_info_bugs, skipped, end_line) = get_json_chunk(
458            opts.file_name, start_line, chunk_size
459        )
460        start_line += chunk_size
461        skipped_lines += skipped
462
463        # Map the bugs into the file-pass pairs.
464        for bugs_per_pass in debug_info_bugs:
465            try:
466                bugs_file = bugs_per_pass["file"]
467                bugs_pass = bugs_per_pass["pass"]
468                bugs = bugs_per_pass["bugs"][0]
469            except:
470                skipped_lines += 1
471                continue
472
473            di_loc_bugs = []
474            di_sp_bugs = []
475            di_var_bugs = []
476
477            # Omit duplicated bugs.
478            di_loc_set = set()
479            di_sp_set = set()
480            di_var_set = set()
481            for bug in bugs:
482                try:
483                    bugs_metadata = bug["metadata"]
484                except:
485                    skipped_bugs += 1
486                    continue
487
488                if bugs_metadata == "DILocation":
489                    try:
490                        action = bug["action"]
491                        bb_name = bug["bb-name"]
492                        fn_name = bug["fn-name"]
493                        instr = bug["instr"]
494                    except:
495                        skipped_bugs += 1
496                        continue
497                    di_loc_bug = DILocBug(action, bb_name, fn_name, instr)
498                    if not str(di_loc_bug) in di_loc_set:
499                        di_loc_set.add(str(di_loc_bug))
500                        if opts.compress:
501                            pass_instr = bugs_pass + instr
502                            if not pass_instr in di_loc_pass_instr_set:
503                                di_loc_pass_instr_set.add(pass_instr)
504                                di_loc_bugs.append(di_loc_bug)
505                        else:
506                            di_loc_bugs.append(di_loc_bug)
507
508                    # Fill the summary dict.
509                    if bugs_pass in di_location_bugs_summary:
510                        di_location_bugs_summary[bugs_pass] += 1
511                    else:
512                        di_location_bugs_summary[bugs_pass] = 1
513                elif bugs_metadata == "DISubprogram":
514                    try:
515                        action = bug["action"]
516                        name = bug["name"]
517                    except:
518                        skipped_bugs += 1
519                        continue
520                    di_sp_bug = DISPBug(action, name)
521                    if not str(di_sp_bug) in di_sp_set:
522                        di_sp_set.add(str(di_sp_bug))
523                        if opts.compress:
524                            pass_fn = bugs_pass + name
525                            if not pass_fn in di_sp_pass_fn_set:
526                                di_sp_pass_fn_set.add(pass_fn)
527                                di_sp_bugs.append(di_sp_bug)
528                        else:
529                            di_sp_bugs.append(di_sp_bug)
530
531                    # Fill the summary dict.
532                    if bugs_pass in di_sp_bugs_summary:
533                        di_sp_bugs_summary[bugs_pass] += 1
534                    else:
535                        di_sp_bugs_summary[bugs_pass] = 1
536                elif bugs_metadata == "dbg-var-intrinsic":
537                    try:
538                        action = bug["action"]
539                        fn_name = bug["fn-name"]
540                        name = bug["name"]
541                    except:
542                        skipped_bugs += 1
543                        continue
544                    di_var_bug = DIVarBug(action, name, fn_name)
545                    if not str(di_var_bug) in di_var_set:
546                        di_var_set.add(str(di_var_bug))
547                        if opts.compress:
548                            pass_var = bugs_pass + name
549                            if not pass_var in di_var_pass_var_set:
550                                di_var_pass_var_set.add(pass_var)
551                                di_var_bugs.append(di_var_bug)
552                        else:
553                            di_var_bugs.append(di_var_bug)
554
555                    # Fill the summary dict.
556                    if bugs_pass in di_var_bugs_summary:
557                        di_var_bugs_summary[bugs_pass] += 1
558                    else:
559                        di_var_bugs_summary[bugs_pass] = 1
560                else:
561                    # Unsupported metadata.
562                    skipped_bugs += 1
563                    continue
564
565            di_location_bugs[bugs_file][bugs_pass] = di_loc_bugs
566            di_subprogram_bugs[bugs_file][bugs_pass] = di_sp_bugs
567            di_variable_bugs[bugs_file][bugs_pass] = di_var_bugs
568
569    generate_html_report(
570        di_location_bugs,
571        di_subprogram_bugs,
572        di_variable_bugs,
573        di_location_bugs_summary,
574        di_sp_bugs_summary,
575        di_var_bugs_summary,
576        opts.html_file,
577    )
578
579    if skipped_lines > 0:
580        print("Skipped lines: " + str(skipped_lines))
581    if skipped_bugs > 0:
582        print("Skipped bugs: " + str(skipped_bugs))
583
584
585if __name__ == "__main__":
586    Main()
587    sys.exit(0)
588