xref: /netbsd-src/external/apache2/llvm/dist/llvm/utils/update_mir_test_checks.py (revision 42b9e898991e23b560315a9b1da6a36a39d4351b)
1#!/usr/bin/env python
2
3"""Updates FileCheck checks in MIR tests.
4
5This script is a utility to update MIR based tests with new FileCheck
6patterns.
7
8The checks added by this script will cover the entire body of each
9function it handles. Virtual registers used are given names via
10FileCheck patterns, so if you do want to check a subset of the body it
11should be straightforward to trim out the irrelevant parts. None of
12the YAML metadata will be checked, other than function names.
13
14If there are multiple llc commands in a test, the full set of checks
15will be repeated for each different check pattern. Checks for patterns
16that are common between different commands will be left as-is by
17default, or removed if the --remove-common-prefixes flag is provided.
18"""
19
20from __future__ import print_function
21
22import argparse
23import collections
24import glob
25import os
26import re
27import subprocess
28import sys
29
30from UpdateTestChecks import common
31
32MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)')
33MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|')
34MIR_BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$')
35VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?')
36MI_FLAGS_STR= (
37    r'(frame-setup |frame-destroy |nnan |ninf |nsz |arcp |contract |afn '
38    r'|reassoc |nuw |nsw |exact |fpexcept )*')
39VREG_DEF_RE = re.compile(
40    r'^ *(?P<vregs>{0}(?:, {0})*) = '
41    r'{1}(?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern, MI_FLAGS_STR))
42MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)')
43
44IR_FUNC_NAME_RE = re.compile(
45    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\(')
46IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)')
47
48MIR_FUNC_RE = re.compile(
49    r'^---$'
50    r'\n'
51    r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$'
52    r'.*?'
53    r'^ *body: *\|\n'
54    r'(?P<body>.*?)\n'
55    r'^\.\.\.$',
56    flags=(re.M | re.S))
57
58
59class LLC:
60    def __init__(self, bin):
61        self.bin = bin
62
63    def __call__(self, args, ir):
64        if ir.endswith('.mir'):
65            args = '{} -x mir'.format(args)
66        with open(ir) as ir_file:
67            stdout = subprocess.check_output('{} {}'.format(self.bin, args),
68                                             shell=True, stdin=ir_file)
69            if sys.version_info[0] > 2:
70              stdout = stdout.decode()
71            # Fix line endings to unix CR style.
72            stdout = stdout.replace('\r\n', '\n')
73        return stdout
74
75
76class Run:
77    def __init__(self, prefixes, cmd_args, triple):
78        self.prefixes = prefixes
79        self.cmd_args = cmd_args
80        self.triple = triple
81
82    def __getitem__(self, index):
83        return [self.prefixes, self.cmd_args, self.triple][index]
84
85
86def log(msg, verbose=True):
87    if verbose:
88        print(msg, file=sys.stderr)
89
90
91def find_triple_in_ir(lines, verbose=False):
92    for l in lines:
93        m = common.TRIPLE_IR_RE.match(l)
94        if m:
95            return m.group(1)
96    return None
97
98
99def find_run_lines(test, lines, verbose=False):
100    raw_lines = [m.group(1)
101                 for m in [common.RUN_LINE_RE.match(l) for l in lines] if m]
102    run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
103    for l in raw_lines[1:]:
104        if run_lines[-1].endswith("\\"):
105            run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l
106        else:
107            run_lines.append(l)
108    if verbose:
109        log('Found {} RUN lines:'.format(len(run_lines)))
110        for l in run_lines:
111            log('  RUN: {}'.format(l))
112    return run_lines
113
114
115def build_run_list(test, run_lines, verbose=False):
116    run_list = []
117    all_prefixes = []
118    for l in run_lines:
119        if '|' not in l:
120            common.warn('Skipping unparseable RUN line: ' + l)
121            continue
122
123        commands = [cmd.strip() for cmd in l.split('|', 1)]
124        llc_cmd = commands[0]
125        filecheck_cmd = commands[1] if len(commands) > 1 else ''
126        common.verify_filecheck_prefixes(filecheck_cmd)
127
128        if not llc_cmd.startswith('llc '):
129            common.warn('Skipping non-llc RUN line: {}'.format(l), test_file=test)
130            continue
131        if not filecheck_cmd.startswith('FileCheck '):
132            common.warn('Skipping non-FileChecked RUN line: {}'.format(l),
133                 test_file=test)
134            continue
135
136        triple = None
137        m = common.TRIPLE_ARG_RE.search(llc_cmd)
138        if m:
139            triple = m.group(1)
140        # If we find -march but not -mtriple, use that.
141        m = common.MARCH_ARG_RE.search(llc_cmd)
142        if m and not triple:
143            triple = '{}--'.format(m.group(1))
144
145        cmd_args = llc_cmd[len('llc'):].strip()
146        cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip()
147
148        check_prefixes = [
149            item
150            for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
151            for item in m.group(1).split(',')]
152        if not check_prefixes:
153            check_prefixes = ['CHECK']
154        all_prefixes += check_prefixes
155
156        run_list.append(Run(check_prefixes, cmd_args, triple))
157
158    # Remove any common prefixes. We'll just leave those entirely alone.
159    common_prefixes = set([prefix for prefix in all_prefixes
160                           if all_prefixes.count(prefix) > 1])
161    for run in run_list:
162        run.prefixes = [p for p in run.prefixes if p not in common_prefixes]
163
164    return run_list, common_prefixes
165
166
167def find_functions_with_one_bb(lines, verbose=False):
168    result = []
169    cur_func = None
170    bbs = 0
171    for line in lines:
172        m = MIR_FUNC_NAME_RE.match(line)
173        if m:
174            if bbs == 1:
175                result.append(cur_func)
176            cur_func = m.group('func')
177            bbs = 0
178        m = MIR_BASIC_BLOCK_RE.match(line)
179        if m:
180            bbs += 1
181    if bbs == 1:
182        result.append(cur_func)
183    return result
184
185
186def build_function_body_dictionary(test, raw_tool_output, triple, prefixes,
187                                   func_dict, verbose):
188    for m in MIR_FUNC_RE.finditer(raw_tool_output):
189        func = m.group('func')
190        body = m.group('body')
191        if verbose:
192            log('Processing function: {}'.format(func))
193            for l in body.splitlines():
194                log('  {}'.format(l))
195        for prefix in prefixes:
196            if func in func_dict[prefix] and func_dict[prefix][func] != body:
197                common.warn('Found conflicting asm for prefix: {}'.format(prefix),
198                     test_file=test)
199            func_dict[prefix][func] = body
200
201
202def add_checks_for_function(test, output_lines, run_list, func_dict, func_name,
203                            single_bb, verbose=False):
204    printed_prefixes = set()
205    for run in run_list:
206        for prefix in run.prefixes:
207            if prefix in printed_prefixes:
208                continue
209            if not func_dict[prefix][func_name]:
210                continue
211            # if printed_prefixes:
212            #     # Add some space between different check prefixes.
213            #     output_lines.append('')
214            printed_prefixes.add(prefix)
215            log('Adding {} lines for {}'.format(prefix, func_name), verbose)
216            add_check_lines(test, output_lines, prefix, func_name, single_bb,
217                            func_dict[prefix][func_name].splitlines())
218            break
219    return output_lines
220
221
222def add_check_lines(test, output_lines, prefix, func_name, single_bb,
223                    func_body):
224    if single_bb:
225        # Don't bother checking the basic block label for a single BB
226        func_body.pop(0)
227
228    if not func_body:
229        common.warn('Function has no instructions to check: {}'.format(func_name),
230             test_file=test)
231        return
232
233    first_line = func_body[0]
234    indent = len(first_line) - len(first_line.lstrip(' '))
235    # A check comment, indented the appropriate amount
236    check = '{:>{}}; {}'.format('', indent, prefix)
237
238    output_lines.append('{}-LABEL: name: {}'.format(check, func_name))
239
240    vreg_map = {}
241    for func_line in func_body:
242        if not func_line.strip():
243            continue
244        m = VREG_DEF_RE.match(func_line)
245        if m:
246            for vreg in VREG_RE.finditer(m.group('vregs')):
247                name = mangle_vreg(m.group('opcode'), vreg_map.values())
248                vreg_map[vreg.group(1)] = name
249                func_line = func_line.replace(
250                    vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1)
251        for number, name in vreg_map.items():
252            func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name),
253                               func_line)
254        check_line = '{}: {}'.format(check, func_line[indent:]).rstrip()
255        output_lines.append(check_line)
256
257
258def mangle_vreg(opcode, current_names):
259    base = opcode
260    # Simplify some common prefixes and suffixes
261    if opcode.startswith('G_'):
262        base = base[len('G_'):]
263    if opcode.endswith('_PSEUDO'):
264        base = base[:len('_PSEUDO')]
265    # Shorten some common opcodes with long-ish names
266    base = dict(IMPLICIT_DEF='DEF',
267                GLOBAL_VALUE='GV',
268                CONSTANT='C',
269                FCONSTANT='C',
270                MERGE_VALUES='MV',
271                UNMERGE_VALUES='UV',
272                INTRINSIC='INT',
273                INTRINSIC_W_SIDE_EFFECTS='INT',
274                INSERT_VECTOR_ELT='IVEC',
275                EXTRACT_VECTOR_ELT='EVEC',
276                SHUFFLE_VECTOR='SHUF').get(base, base)
277    # Avoid ambiguity when opcodes end in numbers
278    if len(base.rstrip('0123456789')) < len(base):
279        base += '_'
280
281    i = 0
282    for name in current_names:
283        if name.rstrip('0123456789') == base:
284            i += 1
285    if i:
286        return '{}{}'.format(base, i)
287    return base
288
289
290def should_add_line_to_output(input_line, prefix_set):
291    # Skip any check lines that we're handling.
292    m = common.CHECK_RE.match(input_line)
293    if m and m.group(1) in prefix_set:
294        return False
295    return True
296
297
298def update_test_file(args, test):
299    log('Scanning for RUN lines in test file: {}'.format(test), args.verbose)
300    with open(test) as fd:
301        input_lines = [l.rstrip() for l in fd]
302
303    script_name = os.path.basename(__file__)
304    first_line = input_lines[0] if input_lines else ""
305    if 'autogenerated' in first_line and script_name not in first_line:
306        common.warn("Skipping test which wasn't autogenerated by " +
307                    script_name + ": " + test)
308        return
309
310    if args.update_only:
311      if not first_line or 'autogenerated' not in first_line:
312        common.warn("Skipping test which isn't autogenerated: " + test)
313        return
314
315    triple_in_ir = find_triple_in_ir(input_lines, args.verbose)
316    run_lines = find_run_lines(test, input_lines, args.verbose)
317    run_list, common_prefixes = build_run_list(test, run_lines, args.verbose)
318
319    simple_functions = find_functions_with_one_bb(input_lines, args.verbose)
320
321    func_dict = {}
322    for run in run_list:
323        for prefix in run.prefixes:
324            func_dict.update({prefix: dict()})
325    for prefixes, llc_args, triple_in_cmd in run_list:
326        log('Extracted LLC cmd: llc {}'.format(llc_args), args.verbose)
327        log('Extracted FileCheck prefixes: {}'.format(prefixes), args.verbose)
328
329        raw_tool_output = args.llc(llc_args, test)
330        if not triple_in_cmd and not triple_in_ir:
331            common.warn('No triple found: skipping file', test_file=test)
332            return
333
334        build_function_body_dictionary(test, raw_tool_output,
335                                       triple_in_cmd or triple_in_ir,
336                                       prefixes, func_dict, args.verbose)
337
338    state = 'toplevel'
339    func_name = None
340    prefix_set = set([prefix for run in run_list for prefix in run.prefixes])
341    log('Rewriting FileCheck prefixes: {}'.format(prefix_set), args.verbose)
342
343    if args.remove_common_prefixes:
344        prefix_set.update(common_prefixes)
345    elif common_prefixes:
346        common.warn('Ignoring common prefixes: {}'.format(common_prefixes),
347             test_file=test)
348
349    comment_char = '#' if test.endswith('.mir') else ';'
350    autogenerated_note = ('{} NOTE: Assertions have been autogenerated by '
351                          'utils/{}'.format(comment_char, script_name))
352    output_lines = []
353    output_lines.append(autogenerated_note)
354
355    for input_line in input_lines:
356        if input_line == autogenerated_note:
357            continue
358
359        if state == 'toplevel':
360            m = IR_FUNC_NAME_RE.match(input_line)
361            if m:
362                state = 'ir function prefix'
363                func_name = m.group('func')
364            if input_line.rstrip('| \r\n') == '---':
365                state = 'document'
366            output_lines.append(input_line)
367        elif state == 'document':
368            m = MIR_FUNC_NAME_RE.match(input_line)
369            if m:
370                state = 'mir function metadata'
371                func_name = m.group('func')
372            if input_line.strip() == '...':
373                state = 'toplevel'
374                func_name = None
375            if should_add_line_to_output(input_line, prefix_set):
376                output_lines.append(input_line)
377        elif state == 'mir function metadata':
378            if should_add_line_to_output(input_line, prefix_set):
379                output_lines.append(input_line)
380            m = MIR_BODY_BEGIN_RE.match(input_line)
381            if m:
382                if func_name in simple_functions:
383                    # If there's only one block, put the checks inside it
384                    state = 'mir function prefix'
385                    continue
386                state = 'mir function body'
387                add_checks_for_function(test, output_lines, run_list,
388                                        func_dict, func_name, single_bb=False,
389                                        verbose=args.verbose)
390        elif state == 'mir function prefix':
391            m = MIR_PREFIX_DATA_RE.match(input_line)
392            if not m:
393                state = 'mir function body'
394                add_checks_for_function(test, output_lines, run_list,
395                                        func_dict, func_name, single_bb=True,
396                                        verbose=args.verbose)
397
398            if should_add_line_to_output(input_line, prefix_set):
399                output_lines.append(input_line)
400        elif state == 'mir function body':
401            if input_line.strip() == '...':
402                state = 'toplevel'
403                func_name = None
404            if should_add_line_to_output(input_line, prefix_set):
405                output_lines.append(input_line)
406        elif state == 'ir function prefix':
407            m = IR_PREFIX_DATA_RE.match(input_line)
408            if not m:
409                state = 'ir function body'
410                add_checks_for_function(test, output_lines, run_list,
411                                        func_dict, func_name, single_bb=False,
412                                        verbose=args.verbose)
413
414            if should_add_line_to_output(input_line, prefix_set):
415                output_lines.append(input_line)
416        elif state == 'ir function body':
417            if input_line.strip() == '}':
418                state = 'toplevel'
419                func_name = None
420            if should_add_line_to_output(input_line, prefix_set):
421                output_lines.append(input_line)
422
423
424    log('Writing {} lines to {}...'.format(len(output_lines), test), args.verbose)
425
426    with open(test, 'wb') as fd:
427        fd.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines])
428
429
430def main():
431    parser = argparse.ArgumentParser(
432        description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
433    parser.add_argument('-v', '--verbose', action='store_true',
434                        help='Show verbose output')
435    parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC,
436                        help='The "llc" binary to generate the test case with')
437    parser.add_argument('--remove-common-prefixes', action='store_true',
438                        help='Remove existing check lines whose prefixes are '
439                             'shared between multiple commands')
440    parser.add_argument('-u', '--update-only', action='store_true',
441                        help='Only update test if it was already autogened')
442    parser.add_argument('tests', nargs='+')
443    args = parser.parse_args()
444
445    test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
446    for test in test_paths:
447        try:
448            update_test_file(args, test)
449        except Exception:
450            common.warn('Error processing file', test_file=test)
451            raise
452
453
454if __name__ == '__main__':
455  main()
456