xref: /openbsd-src/gnu/llvm/clang/tools/scan-build-py/lib/libscanbuild/report.py (revision 12c855180aad702bbcca06e0398d774beeafb155)
1a9ac8606Spatrick# -*- coding: utf-8 -*-
2a9ac8606Spatrick# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3a9ac8606Spatrick# See https://llvm.org/LICENSE.txt for license information.
4a9ac8606Spatrick# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5a9ac8606Spatrick""" This module is responsible to generate 'index.html' for the report.
6a9ac8606Spatrick
7a9ac8606SpatrickThe input for this step is the output directory, where individual reports
8a9ac8606Spatrickcould be found. It parses those reports and generates 'index.html'. """
9a9ac8606Spatrick
10a9ac8606Spatrickimport re
11a9ac8606Spatrickimport os
12a9ac8606Spatrickimport os.path
13a9ac8606Spatrickimport sys
14a9ac8606Spatrickimport shutil
15a9ac8606Spatrickimport plistlib
16a9ac8606Spatrickimport glob
17a9ac8606Spatrickimport json
18a9ac8606Spatrickimport logging
19a9ac8606Spatrickimport datetime
20a9ac8606Spatrickfrom libscanbuild import duplicate_check
21a9ac8606Spatrickfrom libscanbuild.clang import get_version
22a9ac8606Spatrick
23a9ac8606Spatrick__all__ = ['document']
24a9ac8606Spatrick
25a9ac8606Spatrick
26a9ac8606Spatrickdef document(args):
27a9ac8606Spatrick    """ Generates cover report and returns the number of bugs/crashes. """
28a9ac8606Spatrick
29a9ac8606Spatrick    html_reports_available = args.output_format in {'html', 'plist-html', 'sarif-html'}
30a9ac8606Spatrick    sarif_reports_available = args.output_format in {'sarif', 'sarif-html'}
31a9ac8606Spatrick
32a9ac8606Spatrick    logging.debug('count crashes and bugs')
33a9ac8606Spatrick    crash_count = sum(1 for _ in read_crashes(args.output))
34a9ac8606Spatrick    bug_counter = create_counters()
35a9ac8606Spatrick    for bug in read_bugs(args.output, html_reports_available):
36a9ac8606Spatrick        bug_counter(bug)
37a9ac8606Spatrick    result = crash_count + bug_counter.total
38a9ac8606Spatrick
39a9ac8606Spatrick    if html_reports_available and result:
40a9ac8606Spatrick        use_cdb = os.path.exists(args.cdb)
41a9ac8606Spatrick
42a9ac8606Spatrick        logging.debug('generate index.html file')
43a9ac8606Spatrick        # common prefix for source files to have sorter path
44a9ac8606Spatrick        prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd()
45a9ac8606Spatrick        # assemble the cover from multiple fragments
46a9ac8606Spatrick        fragments = []
47a9ac8606Spatrick        try:
48a9ac8606Spatrick            if bug_counter.total:
49a9ac8606Spatrick                fragments.append(bug_summary(args.output, bug_counter))
50a9ac8606Spatrick                fragments.append(bug_report(args.output, prefix))
51a9ac8606Spatrick            if crash_count:
52a9ac8606Spatrick                fragments.append(crash_report(args.output, prefix))
53a9ac8606Spatrick            assemble_cover(args, prefix, fragments)
54a9ac8606Spatrick            # copy additional files to the report
55a9ac8606Spatrick            copy_resource_files(args.output)
56a9ac8606Spatrick            if use_cdb:
57a9ac8606Spatrick                shutil.copy(args.cdb, args.output)
58a9ac8606Spatrick        finally:
59a9ac8606Spatrick            for fragment in fragments:
60a9ac8606Spatrick                os.remove(fragment)
61a9ac8606Spatrick
62a9ac8606Spatrick    if sarif_reports_available:
63a9ac8606Spatrick        logging.debug('merging sarif files')
64a9ac8606Spatrick        merge_sarif_files(args.output)
65a9ac8606Spatrick
66a9ac8606Spatrick    return result
67a9ac8606Spatrick
68a9ac8606Spatrick
69a9ac8606Spatrickdef assemble_cover(args, prefix, fragments):
70a9ac8606Spatrick    """ Put together the fragments into a final report. """
71a9ac8606Spatrick
72a9ac8606Spatrick    import getpass
73a9ac8606Spatrick    import socket
74a9ac8606Spatrick
75a9ac8606Spatrick    if args.html_title is None:
76a9ac8606Spatrick        args.html_title = os.path.basename(prefix) + ' - analyzer results'
77a9ac8606Spatrick
78a9ac8606Spatrick    with open(os.path.join(args.output, 'index.html'), 'w') as handle:
79a9ac8606Spatrick        indent = 0
80a9ac8606Spatrick        handle.write(reindent("""
81a9ac8606Spatrick        |<!DOCTYPE html>
82a9ac8606Spatrick        |<html>
83a9ac8606Spatrick        |  <head>
84a9ac8606Spatrick        |    <title>{html_title}</title>
85a9ac8606Spatrick        |    <link type="text/css" rel="stylesheet" href="scanview.css"/>
86a9ac8606Spatrick        |    <script type='text/javascript' src="sorttable.js"></script>
87a9ac8606Spatrick        |    <script type='text/javascript' src='selectable.js'></script>
88a9ac8606Spatrick        |  </head>""", indent).format(html_title=args.html_title))
89a9ac8606Spatrick        handle.write(comment('SUMMARYENDHEAD'))
90a9ac8606Spatrick        handle.write(reindent("""
91a9ac8606Spatrick        |  <body>
92a9ac8606Spatrick        |    <h1>{html_title}</h1>
93a9ac8606Spatrick        |    <table>
94a9ac8606Spatrick        |      <tr><th>User:</th><td>{user_name}@{host_name}</td></tr>
95a9ac8606Spatrick        |      <tr><th>Working Directory:</th><td>{current_dir}</td></tr>
96a9ac8606Spatrick        |      <tr><th>Command Line:</th><td>{cmd_args}</td></tr>
97a9ac8606Spatrick        |      <tr><th>Clang Version:</th><td>{clang_version}</td></tr>
98a9ac8606Spatrick        |      <tr><th>Date:</th><td>{date}</td></tr>
99a9ac8606Spatrick        |    </table>""", indent).format(html_title=args.html_title,
100a9ac8606Spatrick                                         user_name=getpass.getuser(),
101a9ac8606Spatrick                                         host_name=socket.gethostname(),
102a9ac8606Spatrick                                         current_dir=prefix,
103a9ac8606Spatrick                                         cmd_args=' '.join(sys.argv),
104a9ac8606Spatrick                                         clang_version=get_version(args.clang),
105a9ac8606Spatrick                                         date=datetime.datetime.today(
106a9ac8606Spatrick                                         ).strftime('%c')))
107a9ac8606Spatrick        for fragment in fragments:
108a9ac8606Spatrick            # copy the content of fragments
109a9ac8606Spatrick            with open(fragment, 'r') as input_handle:
110a9ac8606Spatrick                shutil.copyfileobj(input_handle, handle)
111a9ac8606Spatrick        handle.write(reindent("""
112a9ac8606Spatrick        |  </body>
113a9ac8606Spatrick        |</html>""", indent))
114a9ac8606Spatrick
115a9ac8606Spatrick
116a9ac8606Spatrickdef bug_summary(output_dir, bug_counter):
117a9ac8606Spatrick    """ Bug summary is a HTML table to give a better overview of the bugs. """
118a9ac8606Spatrick
119a9ac8606Spatrick    name = os.path.join(output_dir, 'summary.html.fragment')
120a9ac8606Spatrick    with open(name, 'w') as handle:
121a9ac8606Spatrick        indent = 4
122a9ac8606Spatrick        handle.write(reindent("""
123a9ac8606Spatrick        |<h2>Bug Summary</h2>
124a9ac8606Spatrick        |<table>
125a9ac8606Spatrick        |  <thead>
126a9ac8606Spatrick        |    <tr>
127a9ac8606Spatrick        |      <td>Bug Type</td>
128a9ac8606Spatrick        |      <td>Quantity</td>
129a9ac8606Spatrick        |      <td class="sorttable_nosort">Display?</td>
130a9ac8606Spatrick        |    </tr>
131a9ac8606Spatrick        |  </thead>
132a9ac8606Spatrick        |  <tbody>""", indent))
133a9ac8606Spatrick        handle.write(reindent("""
134a9ac8606Spatrick        |    <tr style="font-weight:bold">
135a9ac8606Spatrick        |      <td class="SUMM_DESC">All Bugs</td>
136a9ac8606Spatrick        |      <td class="Q">{0}</td>
137a9ac8606Spatrick        |      <td>
138a9ac8606Spatrick        |        <center>
139a9ac8606Spatrick        |          <input checked type="checkbox" id="AllBugsCheck"
140a9ac8606Spatrick        |                 onClick="CopyCheckedStateToCheckButtons(this);"/>
141a9ac8606Spatrick        |        </center>
142a9ac8606Spatrick        |      </td>
143a9ac8606Spatrick        |    </tr>""", indent).format(bug_counter.total))
144a9ac8606Spatrick        for category, types in bug_counter.categories.items():
145a9ac8606Spatrick            handle.write(reindent("""
146a9ac8606Spatrick        |    <tr>
147a9ac8606Spatrick        |      <th>{0}</th><th colspan=2></th>
148a9ac8606Spatrick        |    </tr>""", indent).format(category))
149a9ac8606Spatrick            for bug_type in types.values():
150a9ac8606Spatrick                handle.write(reindent("""
151a9ac8606Spatrick        |    <tr>
152a9ac8606Spatrick        |      <td class="SUMM_DESC">{bug_type}</td>
153a9ac8606Spatrick        |      <td class="Q">{bug_count}</td>
154a9ac8606Spatrick        |      <td>
155a9ac8606Spatrick        |        <center>
156a9ac8606Spatrick        |          <input checked type="checkbox"
157a9ac8606Spatrick        |                 onClick="ToggleDisplay(this,'{bug_type_class}');"/>
158a9ac8606Spatrick        |        </center>
159a9ac8606Spatrick        |      </td>
160a9ac8606Spatrick        |    </tr>""", indent).format(**bug_type))
161a9ac8606Spatrick        handle.write(reindent("""
162a9ac8606Spatrick        |  </tbody>
163a9ac8606Spatrick        |</table>""", indent))
164a9ac8606Spatrick        handle.write(comment('SUMMARYBUGEND'))
165a9ac8606Spatrick    return name
166a9ac8606Spatrick
167a9ac8606Spatrick
168a9ac8606Spatrickdef bug_report(output_dir, prefix):
169a9ac8606Spatrick    """ Creates a fragment from the analyzer reports. """
170a9ac8606Spatrick
171a9ac8606Spatrick    pretty = prettify_bug(prefix, output_dir)
172a9ac8606Spatrick    bugs = (pretty(bug) for bug in read_bugs(output_dir, True))
173a9ac8606Spatrick
174a9ac8606Spatrick    name = os.path.join(output_dir, 'bugs.html.fragment')
175a9ac8606Spatrick    with open(name, 'w') as handle:
176a9ac8606Spatrick        indent = 4
177a9ac8606Spatrick        handle.write(reindent("""
178a9ac8606Spatrick        |<h2>Reports</h2>
179a9ac8606Spatrick        |<table class="sortable" style="table-layout:automatic">
180a9ac8606Spatrick        |  <thead>
181a9ac8606Spatrick        |    <tr>
182a9ac8606Spatrick        |      <td>Bug Group</td>
183a9ac8606Spatrick        |      <td class="sorttable_sorted">
184a9ac8606Spatrick        |        Bug Type
185a9ac8606Spatrick        |        <span id="sorttable_sortfwdind">&nbsp;&#x25BE;</span>
186a9ac8606Spatrick        |      </td>
187a9ac8606Spatrick        |      <td>File</td>
188a9ac8606Spatrick        |      <td>Function/Method</td>
189a9ac8606Spatrick        |      <td class="Q">Line</td>
190a9ac8606Spatrick        |      <td class="Q">Path Length</td>
191a9ac8606Spatrick        |      <td class="sorttable_nosort"></td>
192a9ac8606Spatrick        |    </tr>
193a9ac8606Spatrick        |  </thead>
194a9ac8606Spatrick        |  <tbody>""", indent))
195a9ac8606Spatrick        handle.write(comment('REPORTBUGCOL'))
196a9ac8606Spatrick        for current in bugs:
197a9ac8606Spatrick            handle.write(reindent("""
198a9ac8606Spatrick        |    <tr class="{bug_type_class}">
199a9ac8606Spatrick        |      <td class="DESC">{bug_category}</td>
200a9ac8606Spatrick        |      <td class="DESC">{bug_type}</td>
201a9ac8606Spatrick        |      <td>{bug_file}</td>
202a9ac8606Spatrick        |      <td class="DESC">{bug_function}</td>
203a9ac8606Spatrick        |      <td class="Q">{bug_line}</td>
204a9ac8606Spatrick        |      <td class="Q">{bug_path_length}</td>
205a9ac8606Spatrick        |      <td><a href="{report_file}#EndPath">View Report</a></td>
206a9ac8606Spatrick        |    </tr>""", indent).format(**current))
207a9ac8606Spatrick            handle.write(comment('REPORTBUG', {'id': current['report_file']}))
208a9ac8606Spatrick        handle.write(reindent("""
209a9ac8606Spatrick        |  </tbody>
210a9ac8606Spatrick        |</table>""", indent))
211a9ac8606Spatrick        handle.write(comment('REPORTBUGEND'))
212a9ac8606Spatrick    return name
213a9ac8606Spatrick
214a9ac8606Spatrick
215a9ac8606Spatrickdef crash_report(output_dir, prefix):
216a9ac8606Spatrick    """ Creates a fragment from the compiler crashes. """
217a9ac8606Spatrick
218a9ac8606Spatrick    pretty = prettify_crash(prefix, output_dir)
219a9ac8606Spatrick    crashes = (pretty(crash) for crash in read_crashes(output_dir))
220a9ac8606Spatrick
221a9ac8606Spatrick    name = os.path.join(output_dir, 'crashes.html.fragment')
222a9ac8606Spatrick    with open(name, 'w') as handle:
223a9ac8606Spatrick        indent = 4
224a9ac8606Spatrick        handle.write(reindent("""
225a9ac8606Spatrick        |<h2>Analyzer Failures</h2>
226a9ac8606Spatrick        |<p>The analyzer had problems processing the following files:</p>
227a9ac8606Spatrick        |<table>
228a9ac8606Spatrick        |  <thead>
229a9ac8606Spatrick        |    <tr>
230a9ac8606Spatrick        |      <td>Problem</td>
231a9ac8606Spatrick        |      <td>Source File</td>
232a9ac8606Spatrick        |      <td>Preprocessed File</td>
233a9ac8606Spatrick        |      <td>STDERR Output</td>
234a9ac8606Spatrick        |    </tr>
235a9ac8606Spatrick        |  </thead>
236a9ac8606Spatrick        |  <tbody>""", indent))
237a9ac8606Spatrick        for current in crashes:
238a9ac8606Spatrick            handle.write(reindent("""
239a9ac8606Spatrick        |    <tr>
240a9ac8606Spatrick        |      <td>{problem}</td>
241a9ac8606Spatrick        |      <td>{source}</td>
242a9ac8606Spatrick        |      <td><a href="{file}">preprocessor output</a></td>
243a9ac8606Spatrick        |      <td><a href="{stderr}">analyzer std err</a></td>
244a9ac8606Spatrick        |    </tr>""", indent).format(**current))
245a9ac8606Spatrick            handle.write(comment('REPORTPROBLEM', current))
246a9ac8606Spatrick        handle.write(reindent("""
247a9ac8606Spatrick        |  </tbody>
248a9ac8606Spatrick        |</table>""", indent))
249a9ac8606Spatrick        handle.write(comment('REPORTCRASHES'))
250a9ac8606Spatrick    return name
251a9ac8606Spatrick
252a9ac8606Spatrick
253a9ac8606Spatrickdef read_crashes(output_dir):
254a9ac8606Spatrick    """ Generate a unique sequence of crashes from given output directory. """
255a9ac8606Spatrick
256a9ac8606Spatrick    return (parse_crash(filename)
257a9ac8606Spatrick            for filename in glob.iglob(os.path.join(output_dir, 'failures',
258a9ac8606Spatrick                                                    '*.info.txt')))
259a9ac8606Spatrick
260a9ac8606Spatrick
261a9ac8606Spatrickdef read_bugs(output_dir, html):
262a9ac8606Spatrick    # type: (str, bool) -> Generator[Dict[str, Any], None, None]
263a9ac8606Spatrick    """ Generate a unique sequence of bugs from given output directory.
264a9ac8606Spatrick
265a9ac8606Spatrick    Duplicates can be in a project if the same module was compiled multiple
266a9ac8606Spatrick    times with different compiler options. These would be better to show in
267a9ac8606Spatrick    the final report (cover) only once. """
268a9ac8606Spatrick
269a9ac8606Spatrick    def empty(file_name):
270a9ac8606Spatrick        return os.stat(file_name).st_size == 0
271a9ac8606Spatrick
272a9ac8606Spatrick    duplicate = duplicate_check(
273a9ac8606Spatrick        lambda bug: '{bug_line}.{bug_path_length}:{bug_file}'.format(**bug))
274a9ac8606Spatrick
275a9ac8606Spatrick    # get the right parser for the job.
276a9ac8606Spatrick    parser = parse_bug_html if html else parse_bug_plist
277a9ac8606Spatrick    # get the input files, which are not empty.
278a9ac8606Spatrick    pattern = os.path.join(output_dir, '*.html' if html else '*.plist')
279a9ac8606Spatrick    bug_files = (file for file in glob.iglob(pattern) if not empty(file))
280a9ac8606Spatrick
281a9ac8606Spatrick    for bug_file in bug_files:
282a9ac8606Spatrick        for bug in parser(bug_file):
283a9ac8606Spatrick            if not duplicate(bug):
284a9ac8606Spatrick                yield bug
285a9ac8606Spatrick
286a9ac8606Spatrickdef merge_sarif_files(output_dir, sort_files=False):
287a9ac8606Spatrick    """ Reads and merges all .sarif files in the given output directory.
288a9ac8606Spatrick
289a9ac8606Spatrick    Each sarif file in the output directory is understood as a single run
290a9ac8606Spatrick    and thus appear separate in the top level runs array. This requires
291a9ac8606Spatrick    modifying the run index of any embedded links in messages.
292a9ac8606Spatrick    """
293a9ac8606Spatrick
294a9ac8606Spatrick    def empty(file_name):
295a9ac8606Spatrick        return os.stat(file_name).st_size == 0
296a9ac8606Spatrick
297a9ac8606Spatrick    def update_sarif_object(sarif_object, runs_count_offset):
298a9ac8606Spatrick        """
299a9ac8606Spatrick            Given a SARIF object, checks its dictionary entries for a 'message' property.
300a9ac8606Spatrick            If it exists, updates the message index of embedded links in the run index.
301a9ac8606Spatrick
302a9ac8606Spatrick            Recursively looks through entries in the dictionary.
303a9ac8606Spatrick        """
304a9ac8606Spatrick        if not isinstance(sarif_object, dict):
305a9ac8606Spatrick            return sarif_object
306a9ac8606Spatrick
307a9ac8606Spatrick        if 'message' in sarif_object:
308a9ac8606Spatrick            sarif_object['message'] = match_and_update_run(sarif_object['message'], runs_count_offset)
309a9ac8606Spatrick
310a9ac8606Spatrick        for key in sarif_object:
311a9ac8606Spatrick            if isinstance(sarif_object[key], list):
312a9ac8606Spatrick                # iterate through subobjects and update it.
313a9ac8606Spatrick                arr = [update_sarif_object(entry, runs_count_offset) for entry in sarif_object[key]]
314a9ac8606Spatrick                sarif_object[key] = arr
315a9ac8606Spatrick            elif isinstance(sarif_object[key], dict):
316a9ac8606Spatrick                sarif_object[key] = update_sarif_object(sarif_object[key], runs_count_offset)
317a9ac8606Spatrick            else:
318a9ac8606Spatrick                # do nothing
319a9ac8606Spatrick                pass
320a9ac8606Spatrick
321a9ac8606Spatrick        return sarif_object
322a9ac8606Spatrick
323a9ac8606Spatrick
324a9ac8606Spatrick    def match_and_update_run(message, runs_count_offset):
325a9ac8606Spatrick        """
326a9ac8606Spatrick            Given a SARIF message object, checks if the text property contains an embedded link and
327a9ac8606Spatrick            updates the run index if necessary.
328a9ac8606Spatrick        """
329a9ac8606Spatrick        if 'text' not in message:
330a9ac8606Spatrick            return message
331a9ac8606Spatrick
332a9ac8606Spatrick        # we only merge runs, so we only need to update the run index
333a9ac8606Spatrick        pattern = re.compile(r'sarif:/runs/(\d+)')
334a9ac8606Spatrick
335a9ac8606Spatrick        text = message['text']
336a9ac8606Spatrick        matches = re.finditer(pattern, text)
337a9ac8606Spatrick        matches_list = list(matches)
338a9ac8606Spatrick
339a9ac8606Spatrick        # update matches from right to left to make increasing character length (9->10) smoother
340a9ac8606Spatrick        for idx in range(len(matches_list) - 1, -1, -1):
341a9ac8606Spatrick            match = matches_list[idx]
342a9ac8606Spatrick            new_run_count = str(runs_count_offset + int(match.group(1)))
343a9ac8606Spatrick            text = text[0:match.start(1)] + new_run_count + text[match.end(1):]
344a9ac8606Spatrick
345a9ac8606Spatrick        message['text'] = text
346a9ac8606Spatrick        return message
347a9ac8606Spatrick
348a9ac8606Spatrick
349a9ac8606Spatrick
350a9ac8606Spatrick    sarif_files = (file for file in glob.iglob(os.path.join(output_dir, '*.sarif')) if not empty(file))
351a9ac8606Spatrick    # exposed for testing since the order of files returned by glob is not guaranteed to be sorted
352a9ac8606Spatrick    if sort_files:
353a9ac8606Spatrick        sarif_files = list(sarif_files)
354a9ac8606Spatrick        sarif_files.sort()
355a9ac8606Spatrick
356a9ac8606Spatrick    runs_count = 0
357a9ac8606Spatrick    merged = {}
358a9ac8606Spatrick    for sarif_file in sarif_files:
359a9ac8606Spatrick        with open(sarif_file) as fp:
360a9ac8606Spatrick            sarif = json.load(fp)
361a9ac8606Spatrick            if 'runs' not in sarif:
362a9ac8606Spatrick                continue
363a9ac8606Spatrick
364a9ac8606Spatrick            # start with the first file
365a9ac8606Spatrick            if not merged:
366a9ac8606Spatrick                merged = sarif
367a9ac8606Spatrick            else:
368a9ac8606Spatrick                # extract the run and append it to the merged output
369a9ac8606Spatrick                for run in sarif['runs']:
370a9ac8606Spatrick                    new_run = update_sarif_object(run, runs_count)
371a9ac8606Spatrick                    merged['runs'].append(new_run)
372a9ac8606Spatrick
373a9ac8606Spatrick            runs_count += len(sarif['runs'])
374a9ac8606Spatrick
375a9ac8606Spatrick    with open(os.path.join(output_dir, 'results-merged.sarif'), 'w') as out:
376a9ac8606Spatrick        json.dump(merged, out, indent=4, sort_keys=True)
377a9ac8606Spatrick
378a9ac8606Spatrick
379a9ac8606Spatrickdef parse_bug_plist(filename):
380a9ac8606Spatrick    """ Returns the generator of bugs from a single .plist file. """
381a9ac8606Spatrick
382a9ac8606Spatrick    with open(filename, 'rb') as fp:
383a9ac8606Spatrick      content = plistlib.load(fp)
384a9ac8606Spatrick      files = content.get('files')
385a9ac8606Spatrick      for bug in content.get('diagnostics', []):
386a9ac8606Spatrick          if len(files) <= int(bug['location']['file']):
387a9ac8606Spatrick              logging.warning('Parsing bug from "%s" failed', filename)
388a9ac8606Spatrick              continue
389a9ac8606Spatrick
390a9ac8606Spatrick          yield {
391a9ac8606Spatrick              'result': filename,
392a9ac8606Spatrick              'bug_type': bug['type'],
393a9ac8606Spatrick              'bug_category': bug['category'],
394a9ac8606Spatrick              'bug_line': int(bug['location']['line']),
395a9ac8606Spatrick              'bug_path_length': int(bug['location']['col']),
396a9ac8606Spatrick              'bug_file': files[int(bug['location']['file'])]
397a9ac8606Spatrick          }
398a9ac8606Spatrick
399a9ac8606Spatrick
400a9ac8606Spatrickdef parse_bug_html(filename):
401a9ac8606Spatrick    """ Parse out the bug information from HTML output. """
402a9ac8606Spatrick
403a9ac8606Spatrick    patterns = [re.compile(r'<!-- BUGTYPE (?P<bug_type>.*) -->$'),
404a9ac8606Spatrick                re.compile(r'<!-- BUGFILE (?P<bug_file>.*) -->$'),
405a9ac8606Spatrick                re.compile(r'<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$'),
406a9ac8606Spatrick                re.compile(r'<!-- BUGLINE (?P<bug_line>.*) -->$'),
407a9ac8606Spatrick                re.compile(r'<!-- BUGCATEGORY (?P<bug_category>.*) -->$'),
408a9ac8606Spatrick                re.compile(r'<!-- BUGDESC (?P<bug_description>.*) -->$'),
409a9ac8606Spatrick                re.compile(r'<!-- FUNCTIONNAME (?P<bug_function>.*) -->$')]
410a9ac8606Spatrick    endsign = re.compile(r'<!-- BUGMETAEND -->')
411a9ac8606Spatrick
412a9ac8606Spatrick    bug = {
413a9ac8606Spatrick        'report_file': filename,
414a9ac8606Spatrick        'bug_function': 'n/a',  # compatibility with < clang-3.5
415a9ac8606Spatrick        'bug_category': 'Other',
416a9ac8606Spatrick        'bug_line': 0,
417a9ac8606Spatrick        'bug_path_length': 1
418a9ac8606Spatrick    }
419a9ac8606Spatrick
420*12c85518Srobert    with open(filename, encoding='utf-8') as handler:
421a9ac8606Spatrick        for line in handler.readlines():
422a9ac8606Spatrick            # do not read the file further
423a9ac8606Spatrick            if endsign.match(line):
424a9ac8606Spatrick                break
425a9ac8606Spatrick            # search for the right lines
426a9ac8606Spatrick            for regex in patterns:
427a9ac8606Spatrick                match = regex.match(line.strip())
428a9ac8606Spatrick                if match:
429a9ac8606Spatrick                    bug.update(match.groupdict())
430a9ac8606Spatrick                    break
431a9ac8606Spatrick
432a9ac8606Spatrick    encode_value(bug, 'bug_line', int)
433a9ac8606Spatrick    encode_value(bug, 'bug_path_length', int)
434a9ac8606Spatrick
435a9ac8606Spatrick    yield bug
436a9ac8606Spatrick
437a9ac8606Spatrick
438a9ac8606Spatrickdef parse_crash(filename):
439a9ac8606Spatrick    """ Parse out the crash information from the report file. """
440a9ac8606Spatrick
441a9ac8606Spatrick    match = re.match(r'(.*)\.info\.txt', filename)
442a9ac8606Spatrick    name = match.group(1) if match else None
443a9ac8606Spatrick    with open(filename, mode='rb') as handler:
444a9ac8606Spatrick        # this is a workaround to fix windows read '\r\n' as new lines.
445a9ac8606Spatrick        lines = [line.decode().rstrip() for line in handler.readlines()]
446a9ac8606Spatrick        return {
447a9ac8606Spatrick            'source': lines[0],
448a9ac8606Spatrick            'problem': lines[1],
449a9ac8606Spatrick            'file': name,
450a9ac8606Spatrick            'info': name + '.info.txt',
451a9ac8606Spatrick            'stderr': name + '.stderr.txt'
452a9ac8606Spatrick        }
453a9ac8606Spatrick
454a9ac8606Spatrick
455a9ac8606Spatrickdef category_type_name(bug):
456a9ac8606Spatrick    """ Create a new bug attribute from bug by category and type.
457a9ac8606Spatrick
458a9ac8606Spatrick    The result will be used as CSS class selector in the final report. """
459a9ac8606Spatrick
460a9ac8606Spatrick    def smash(key):
461a9ac8606Spatrick        """ Make value ready to be HTML attribute value. """
462a9ac8606Spatrick
463a9ac8606Spatrick        return bug.get(key, '').lower().replace(' ', '_').replace("'", '')
464a9ac8606Spatrick
465a9ac8606Spatrick    return escape('bt_' + smash('bug_category') + '_' + smash('bug_type'))
466a9ac8606Spatrick
467a9ac8606Spatrick
468a9ac8606Spatrickdef create_counters():
469a9ac8606Spatrick    """ Create counters for bug statistics.
470a9ac8606Spatrick
471a9ac8606Spatrick    Two entries are maintained: 'total' is an integer, represents the
472a9ac8606Spatrick    number of bugs. The 'categories' is a two level categorisation of bug
473a9ac8606Spatrick    counters. The first level is 'bug category' the second is 'bug type'.
474a9ac8606Spatrick    Each entry in this classification is a dictionary of 'count', 'type'
475a9ac8606Spatrick    and 'label'. """
476a9ac8606Spatrick
477a9ac8606Spatrick    def predicate(bug):
478a9ac8606Spatrick        bug_category = bug['bug_category']
479a9ac8606Spatrick        bug_type = bug['bug_type']
480a9ac8606Spatrick        current_category = predicate.categories.get(bug_category, dict())
481a9ac8606Spatrick        current_type = current_category.get(bug_type, {
482a9ac8606Spatrick            'bug_type': bug_type,
483a9ac8606Spatrick            'bug_type_class': category_type_name(bug),
484a9ac8606Spatrick            'bug_count': 0
485a9ac8606Spatrick        })
486a9ac8606Spatrick        current_type.update({'bug_count': current_type['bug_count'] + 1})
487a9ac8606Spatrick        current_category.update({bug_type: current_type})
488a9ac8606Spatrick        predicate.categories.update({bug_category: current_category})
489a9ac8606Spatrick        predicate.total += 1
490a9ac8606Spatrick
491a9ac8606Spatrick    predicate.total = 0
492a9ac8606Spatrick    predicate.categories = dict()
493a9ac8606Spatrick    return predicate
494a9ac8606Spatrick
495a9ac8606Spatrick
496a9ac8606Spatrickdef prettify_bug(prefix, output_dir):
497a9ac8606Spatrick    def predicate(bug):
498a9ac8606Spatrick        """ Make safe this values to embed into HTML. """
499a9ac8606Spatrick
500a9ac8606Spatrick        bug['bug_type_class'] = category_type_name(bug)
501a9ac8606Spatrick
502a9ac8606Spatrick        encode_value(bug, 'bug_file', lambda x: escape(chop(prefix, x)))
503a9ac8606Spatrick        encode_value(bug, 'bug_category', escape)
504a9ac8606Spatrick        encode_value(bug, 'bug_type', escape)
505a9ac8606Spatrick        encode_value(bug, 'report_file', lambda x: escape(chop(output_dir, x)))
506a9ac8606Spatrick        return bug
507a9ac8606Spatrick
508a9ac8606Spatrick    return predicate
509a9ac8606Spatrick
510a9ac8606Spatrick
511a9ac8606Spatrickdef prettify_crash(prefix, output_dir):
512a9ac8606Spatrick    def predicate(crash):
513a9ac8606Spatrick        """ Make safe this values to embed into HTML. """
514a9ac8606Spatrick
515a9ac8606Spatrick        encode_value(crash, 'source', lambda x: escape(chop(prefix, x)))
516a9ac8606Spatrick        encode_value(crash, 'problem', escape)
517a9ac8606Spatrick        encode_value(crash, 'file', lambda x: escape(chop(output_dir, x)))
518a9ac8606Spatrick        encode_value(crash, 'info', lambda x: escape(chop(output_dir, x)))
519a9ac8606Spatrick        encode_value(crash, 'stderr', lambda x: escape(chop(output_dir, x)))
520a9ac8606Spatrick        return crash
521a9ac8606Spatrick
522a9ac8606Spatrick    return predicate
523a9ac8606Spatrick
524a9ac8606Spatrick
525a9ac8606Spatrickdef copy_resource_files(output_dir):
526a9ac8606Spatrick    """ Copy the javascript and css files to the report directory. """
527a9ac8606Spatrick
528a9ac8606Spatrick    this_dir = os.path.dirname(os.path.realpath(__file__))
529a9ac8606Spatrick    for resource in os.listdir(os.path.join(this_dir, 'resources')):
530a9ac8606Spatrick        shutil.copy(os.path.join(this_dir, 'resources', resource), output_dir)
531a9ac8606Spatrick
532a9ac8606Spatrick
533a9ac8606Spatrickdef encode_value(container, key, encode):
534a9ac8606Spatrick    """ Run 'encode' on 'container[key]' value and update it. """
535a9ac8606Spatrick
536a9ac8606Spatrick    if key in container:
537a9ac8606Spatrick        value = encode(container[key])
538a9ac8606Spatrick        container.update({key: value})
539a9ac8606Spatrick
540a9ac8606Spatrick
541a9ac8606Spatrickdef chop(prefix, filename):
542a9ac8606Spatrick    """ Create 'filename' from '/prefix/filename' """
543a9ac8606Spatrick
544a9ac8606Spatrick    return filename if not len(prefix) else os.path.relpath(filename, prefix)
545a9ac8606Spatrick
546a9ac8606Spatrick
547a9ac8606Spatrickdef escape(text):
548a9ac8606Spatrick    """ Paranoid HTML escape method. (Python version independent) """
549a9ac8606Spatrick
550a9ac8606Spatrick    escape_table = {
551a9ac8606Spatrick        '&': '&amp;',
552a9ac8606Spatrick        '"': '&quot;',
553a9ac8606Spatrick        "'": '&apos;',
554a9ac8606Spatrick        '>': '&gt;',
555a9ac8606Spatrick        '<': '&lt;'
556a9ac8606Spatrick    }
557a9ac8606Spatrick    return ''.join(escape_table.get(c, c) for c in text)
558a9ac8606Spatrick
559a9ac8606Spatrick
560a9ac8606Spatrickdef reindent(text, indent):
561a9ac8606Spatrick    """ Utility function to format html output and keep indentation. """
562a9ac8606Spatrick
563a9ac8606Spatrick    result = ''
564a9ac8606Spatrick    for line in text.splitlines():
565a9ac8606Spatrick        if len(line.strip()):
566a9ac8606Spatrick            result += ' ' * indent + line.split('|')[1] + os.linesep
567a9ac8606Spatrick    return result
568a9ac8606Spatrick
569a9ac8606Spatrick
570a9ac8606Spatrickdef comment(name, opts=dict()):
571a9ac8606Spatrick    """ Utility function to format meta information as comment. """
572a9ac8606Spatrick
573a9ac8606Spatrick    attributes = ''
574a9ac8606Spatrick    for key, value in opts.items():
575a9ac8606Spatrick        attributes += ' {0}="{1}"'.format(key, value)
576a9ac8606Spatrick
577a9ac8606Spatrick    return '<!-- {0}{1} -->{2}'.format(name, attributes, os.linesep)
578a9ac8606Spatrick
579a9ac8606Spatrick
580a9ac8606Spatrickdef commonprefix_from(filename):
581a9ac8606Spatrick    """ Create file prefix from a compilation database entries. """
582a9ac8606Spatrick
583a9ac8606Spatrick    with open(filename, 'r') as handle:
584a9ac8606Spatrick        return commonprefix(item['file'] for item in json.load(handle))
585a9ac8606Spatrick
586a9ac8606Spatrick
587a9ac8606Spatrickdef commonprefix(files):
588a9ac8606Spatrick    """ Fixed version of os.path.commonprefix.
589a9ac8606Spatrick
590a9ac8606Spatrick    :param files: list of file names.
591a9ac8606Spatrick    :return: the longest path prefix that is a prefix of all files. """
592a9ac8606Spatrick    result = None
593a9ac8606Spatrick    for current in files:
594a9ac8606Spatrick        if result is not None:
595a9ac8606Spatrick            result = os.path.commonprefix([result, current])
596a9ac8606Spatrick        else:
597a9ac8606Spatrick            result = current
598a9ac8606Spatrick
599a9ac8606Spatrick    if result is None:
600a9ac8606Spatrick        return ''
601a9ac8606Spatrick    elif not os.path.isdir(result):
602a9ac8606Spatrick        return os.path.dirname(result)
603a9ac8606Spatrick    else:
604a9ac8606Spatrick        return os.path.abspath(result)
605