1#!/usr/bin/env python 2 3"""Updates FileCheck checks in MIR tests. 4 5This script is a utility to update MIR based tests with new FileCheck 6patterns. 7 8The checks added by this script will cover the entire body of each 9function it handles. Virtual registers used are given names via 10FileCheck patterns, so if you do want to check a subset of the body it 11should be straightforward to trim out the irrelevant parts. None of 12the YAML metadata will be checked, other than function names. 13 14If there are multiple llc commands in a test, the full set of checks 15will be repeated for each different check pattern. Checks for patterns 16that are common between different commands will be left as-is by 17default, or removed if the --remove-common-prefixes flag is provided. 18""" 19 20from __future__ import print_function 21 22import argparse 23import collections 24import glob 25import os 26import re 27import subprocess 28import sys 29 30from UpdateTestChecks import common 31 32MIR_FUNC_NAME_RE = re.compile(r' *name: *(?P<func>[A-Za-z0-9_.-]+)') 33MIR_BODY_BEGIN_RE = re.compile(r' *body: *\|') 34MIR_BASIC_BLOCK_RE = re.compile(r' *bb\.[0-9]+.*:$') 35VREG_RE = re.compile(r'(%[0-9]+)(?::[a-z0-9_]+)?(?:\([<>a-z0-9 ]+\))?') 36MI_FLAGS_STR= ( 37 r'(frame-setup |frame-destroy |nnan |ninf |nsz |arcp |contract |afn ' 38 r'|reassoc |nuw |nsw |exact |fpexcept )*') 39VREG_DEF_RE = re.compile( 40 r'^ *(?P<vregs>{0}(?:, {0})*) = ' 41 r'{1}(?P<opcode>[A-Zt][A-Za-z0-9_]+)'.format(VREG_RE.pattern, MI_FLAGS_STR)) 42MIR_PREFIX_DATA_RE = re.compile(r'^ *(;|bb.[0-9].*: *$|[a-z]+:( |$)|$)') 43 44IR_FUNC_NAME_RE = re.compile( 45 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[A-Za-z0-9_.]+)\s*\(') 46IR_PREFIX_DATA_RE = re.compile(r'^ *(;|$)') 47 48MIR_FUNC_RE = re.compile( 49 r'^---$' 50 r'\n' 51 r'^ *name: *(?P<func>[A-Za-z0-9_.-]+)$' 52 r'.*?' 53 r'^ *body: *\|\n' 54 r'(?P<body>.*?)\n' 55 r'^\.\.\.$', 56 flags=(re.M | re.S)) 57 58 59class LLC: 60 def __init__(self, bin): 61 self.bin = bin 62 63 def __call__(self, args, ir): 64 if ir.endswith('.mir'): 65 args = '{} -x mir'.format(args) 66 with open(ir) as ir_file: 67 stdout = subprocess.check_output('{} {}'.format(self.bin, args), 68 shell=True, stdin=ir_file) 69 if sys.version_info[0] > 2: 70 stdout = stdout.decode() 71 # Fix line endings to unix CR style. 72 stdout = stdout.replace('\r\n', '\n') 73 return stdout 74 75 76class Run: 77 def __init__(self, prefixes, cmd_args, triple): 78 self.prefixes = prefixes 79 self.cmd_args = cmd_args 80 self.triple = triple 81 82 def __getitem__(self, index): 83 return [self.prefixes, self.cmd_args, self.triple][index] 84 85 86def log(msg, verbose=True): 87 if verbose: 88 print(msg, file=sys.stderr) 89 90 91def find_triple_in_ir(lines, verbose=False): 92 for l in lines: 93 m = common.TRIPLE_IR_RE.match(l) 94 if m: 95 return m.group(1) 96 return None 97 98 99def find_run_lines(test, lines, verbose=False): 100 raw_lines = [m.group(1) 101 for m in [common.RUN_LINE_RE.match(l) for l in lines] if m] 102 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 103 for l in raw_lines[1:]: 104 if run_lines[-1].endswith("\\"): 105 run_lines[-1] = run_lines[-1].rstrip("\\") + " " + l 106 else: 107 run_lines.append(l) 108 if verbose: 109 log('Found {} RUN lines:'.format(len(run_lines))) 110 for l in run_lines: 111 log(' RUN: {}'.format(l)) 112 return run_lines 113 114 115def build_run_list(test, run_lines, verbose=False): 116 run_list = [] 117 all_prefixes = [] 118 for l in run_lines: 119 if '|' not in l: 120 common.warn('Skipping unparseable RUN line: ' + l) 121 continue 122 123 commands = [cmd.strip() for cmd in l.split('|', 1)] 124 llc_cmd = commands[0] 125 filecheck_cmd = commands[1] if len(commands) > 1 else '' 126 common.verify_filecheck_prefixes(filecheck_cmd) 127 128 if not llc_cmd.startswith('llc '): 129 common.warn('Skipping non-llc RUN line: {}'.format(l), test_file=test) 130 continue 131 if not filecheck_cmd.startswith('FileCheck '): 132 common.warn('Skipping non-FileChecked RUN line: {}'.format(l), 133 test_file=test) 134 continue 135 136 triple = None 137 m = common.TRIPLE_ARG_RE.search(llc_cmd) 138 if m: 139 triple = m.group(1) 140 # If we find -march but not -mtriple, use that. 141 m = common.MARCH_ARG_RE.search(llc_cmd) 142 if m and not triple: 143 triple = '{}--'.format(m.group(1)) 144 145 cmd_args = llc_cmd[len('llc'):].strip() 146 cmd_args = cmd_args.replace('< %s', '').replace('%s', '').strip() 147 148 check_prefixes = [ 149 item 150 for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd) 151 for item in m.group(1).split(',')] 152 if not check_prefixes: 153 check_prefixes = ['CHECK'] 154 all_prefixes += check_prefixes 155 156 run_list.append(Run(check_prefixes, cmd_args, triple)) 157 158 # Remove any common prefixes. We'll just leave those entirely alone. 159 common_prefixes = set([prefix for prefix in all_prefixes 160 if all_prefixes.count(prefix) > 1]) 161 for run in run_list: 162 run.prefixes = [p for p in run.prefixes if p not in common_prefixes] 163 164 return run_list, common_prefixes 165 166 167def find_functions_with_one_bb(lines, verbose=False): 168 result = [] 169 cur_func = None 170 bbs = 0 171 for line in lines: 172 m = MIR_FUNC_NAME_RE.match(line) 173 if m: 174 if bbs == 1: 175 result.append(cur_func) 176 cur_func = m.group('func') 177 bbs = 0 178 m = MIR_BASIC_BLOCK_RE.match(line) 179 if m: 180 bbs += 1 181 if bbs == 1: 182 result.append(cur_func) 183 return result 184 185 186def build_function_body_dictionary(test, raw_tool_output, triple, prefixes, 187 func_dict, verbose): 188 for m in MIR_FUNC_RE.finditer(raw_tool_output): 189 func = m.group('func') 190 body = m.group('body') 191 if verbose: 192 log('Processing function: {}'.format(func)) 193 for l in body.splitlines(): 194 log(' {}'.format(l)) 195 for prefix in prefixes: 196 if func in func_dict[prefix] and func_dict[prefix][func] != body: 197 common.warn('Found conflicting asm for prefix: {}'.format(prefix), 198 test_file=test) 199 func_dict[prefix][func] = body 200 201 202def add_checks_for_function(test, output_lines, run_list, func_dict, func_name, 203 single_bb, verbose=False): 204 printed_prefixes = set() 205 for run in run_list: 206 for prefix in run.prefixes: 207 if prefix in printed_prefixes: 208 continue 209 if not func_dict[prefix][func_name]: 210 continue 211 # if printed_prefixes: 212 # # Add some space between different check prefixes. 213 # output_lines.append('') 214 printed_prefixes.add(prefix) 215 log('Adding {} lines for {}'.format(prefix, func_name), verbose) 216 add_check_lines(test, output_lines, prefix, func_name, single_bb, 217 func_dict[prefix][func_name].splitlines()) 218 break 219 return output_lines 220 221 222def add_check_lines(test, output_lines, prefix, func_name, single_bb, 223 func_body): 224 if single_bb: 225 # Don't bother checking the basic block label for a single BB 226 func_body.pop(0) 227 228 if not func_body: 229 common.warn('Function has no instructions to check: {}'.format(func_name), 230 test_file=test) 231 return 232 233 first_line = func_body[0] 234 indent = len(first_line) - len(first_line.lstrip(' ')) 235 # A check comment, indented the appropriate amount 236 check = '{:>{}}; {}'.format('', indent, prefix) 237 238 output_lines.append('{}-LABEL: name: {}'.format(check, func_name)) 239 240 vreg_map = {} 241 for func_line in func_body: 242 if not func_line.strip(): 243 continue 244 m = VREG_DEF_RE.match(func_line) 245 if m: 246 for vreg in VREG_RE.finditer(m.group('vregs')): 247 name = mangle_vreg(m.group('opcode'), vreg_map.values()) 248 vreg_map[vreg.group(1)] = name 249 func_line = func_line.replace( 250 vreg.group(1), '[[{}:%[0-9]+]]'.format(name), 1) 251 for number, name in vreg_map.items(): 252 func_line = re.sub(r'{}\b'.format(number), '[[{}]]'.format(name), 253 func_line) 254 check_line = '{}: {}'.format(check, func_line[indent:]).rstrip() 255 output_lines.append(check_line) 256 257 258def mangle_vreg(opcode, current_names): 259 base = opcode 260 # Simplify some common prefixes and suffixes 261 if opcode.startswith('G_'): 262 base = base[len('G_'):] 263 if opcode.endswith('_PSEUDO'): 264 base = base[:len('_PSEUDO')] 265 # Shorten some common opcodes with long-ish names 266 base = dict(IMPLICIT_DEF='DEF', 267 GLOBAL_VALUE='GV', 268 CONSTANT='C', 269 FCONSTANT='C', 270 MERGE_VALUES='MV', 271 UNMERGE_VALUES='UV', 272 INTRINSIC='INT', 273 INTRINSIC_W_SIDE_EFFECTS='INT', 274 INSERT_VECTOR_ELT='IVEC', 275 EXTRACT_VECTOR_ELT='EVEC', 276 SHUFFLE_VECTOR='SHUF').get(base, base) 277 # Avoid ambiguity when opcodes end in numbers 278 if len(base.rstrip('0123456789')) < len(base): 279 base += '_' 280 281 i = 0 282 for name in current_names: 283 if name.rstrip('0123456789') == base: 284 i += 1 285 if i: 286 return '{}{}'.format(base, i) 287 return base 288 289 290def should_add_line_to_output(input_line, prefix_set): 291 # Skip any check lines that we're handling. 292 m = common.CHECK_RE.match(input_line) 293 if m and m.group(1) in prefix_set: 294 return False 295 return True 296 297 298def update_test_file(args, test): 299 log('Scanning for RUN lines in test file: {}'.format(test), args.verbose) 300 with open(test) as fd: 301 input_lines = [l.rstrip() for l in fd] 302 303 script_name = os.path.basename(__file__) 304 first_line = input_lines[0] if input_lines else "" 305 if 'autogenerated' in first_line and script_name not in first_line: 306 common.warn("Skipping test which wasn't autogenerated by " + 307 script_name + ": " + test) 308 return 309 310 if args.update_only: 311 if not first_line or 'autogenerated' not in first_line: 312 common.warn("Skipping test which isn't autogenerated: " + test) 313 return 314 315 triple_in_ir = find_triple_in_ir(input_lines, args.verbose) 316 run_lines = find_run_lines(test, input_lines, args.verbose) 317 run_list, common_prefixes = build_run_list(test, run_lines, args.verbose) 318 319 simple_functions = find_functions_with_one_bb(input_lines, args.verbose) 320 321 func_dict = {} 322 for run in run_list: 323 for prefix in run.prefixes: 324 func_dict.update({prefix: dict()}) 325 for prefixes, llc_args, triple_in_cmd in run_list: 326 log('Extracted LLC cmd: llc {}'.format(llc_args), args.verbose) 327 log('Extracted FileCheck prefixes: {}'.format(prefixes), args.verbose) 328 329 raw_tool_output = args.llc(llc_args, test) 330 if not triple_in_cmd and not triple_in_ir: 331 common.warn('No triple found: skipping file', test_file=test) 332 return 333 334 build_function_body_dictionary(test, raw_tool_output, 335 triple_in_cmd or triple_in_ir, 336 prefixes, func_dict, args.verbose) 337 338 state = 'toplevel' 339 func_name = None 340 prefix_set = set([prefix for run in run_list for prefix in run.prefixes]) 341 log('Rewriting FileCheck prefixes: {}'.format(prefix_set), args.verbose) 342 343 if args.remove_common_prefixes: 344 prefix_set.update(common_prefixes) 345 elif common_prefixes: 346 common.warn('Ignoring common prefixes: {}'.format(common_prefixes), 347 test_file=test) 348 349 comment_char = '#' if test.endswith('.mir') else ';' 350 autogenerated_note = ('{} NOTE: Assertions have been autogenerated by ' 351 'utils/{}'.format(comment_char, script_name)) 352 output_lines = [] 353 output_lines.append(autogenerated_note) 354 355 for input_line in input_lines: 356 if input_line == autogenerated_note: 357 continue 358 359 if state == 'toplevel': 360 m = IR_FUNC_NAME_RE.match(input_line) 361 if m: 362 state = 'ir function prefix' 363 func_name = m.group('func') 364 if input_line.rstrip('| \r\n') == '---': 365 state = 'document' 366 output_lines.append(input_line) 367 elif state == 'document': 368 m = MIR_FUNC_NAME_RE.match(input_line) 369 if m: 370 state = 'mir function metadata' 371 func_name = m.group('func') 372 if input_line.strip() == '...': 373 state = 'toplevel' 374 func_name = None 375 if should_add_line_to_output(input_line, prefix_set): 376 output_lines.append(input_line) 377 elif state == 'mir function metadata': 378 if should_add_line_to_output(input_line, prefix_set): 379 output_lines.append(input_line) 380 m = MIR_BODY_BEGIN_RE.match(input_line) 381 if m: 382 if func_name in simple_functions: 383 # If there's only one block, put the checks inside it 384 state = 'mir function prefix' 385 continue 386 state = 'mir function body' 387 add_checks_for_function(test, output_lines, run_list, 388 func_dict, func_name, single_bb=False, 389 verbose=args.verbose) 390 elif state == 'mir function prefix': 391 m = MIR_PREFIX_DATA_RE.match(input_line) 392 if not m: 393 state = 'mir function body' 394 add_checks_for_function(test, output_lines, run_list, 395 func_dict, func_name, single_bb=True, 396 verbose=args.verbose) 397 398 if should_add_line_to_output(input_line, prefix_set): 399 output_lines.append(input_line) 400 elif state == 'mir function body': 401 if input_line.strip() == '...': 402 state = 'toplevel' 403 func_name = None 404 if should_add_line_to_output(input_line, prefix_set): 405 output_lines.append(input_line) 406 elif state == 'ir function prefix': 407 m = IR_PREFIX_DATA_RE.match(input_line) 408 if not m: 409 state = 'ir function body' 410 add_checks_for_function(test, output_lines, run_list, 411 func_dict, func_name, single_bb=False, 412 verbose=args.verbose) 413 414 if should_add_line_to_output(input_line, prefix_set): 415 output_lines.append(input_line) 416 elif state == 'ir function body': 417 if input_line.strip() == '}': 418 state = 'toplevel' 419 func_name = None 420 if should_add_line_to_output(input_line, prefix_set): 421 output_lines.append(input_line) 422 423 424 log('Writing {} lines to {}...'.format(len(output_lines), test), args.verbose) 425 426 with open(test, 'wb') as fd: 427 fd.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines]) 428 429 430def main(): 431 parser = argparse.ArgumentParser( 432 description=__doc__, formatter_class=argparse.RawTextHelpFormatter) 433 parser.add_argument('-v', '--verbose', action='store_true', 434 help='Show verbose output') 435 parser.add_argument('--llc-binary', dest='llc', default='llc', type=LLC, 436 help='The "llc" binary to generate the test case with') 437 parser.add_argument('--remove-common-prefixes', action='store_true', 438 help='Remove existing check lines whose prefixes are ' 439 'shared between multiple commands') 440 parser.add_argument('-u', '--update-only', action='store_true', 441 help='Only update test if it was already autogened') 442 parser.add_argument('tests', nargs='+') 443 args = parser.parse_args() 444 445 test_paths = [test for pattern in args.tests for test in glob.glob(pattern)] 446 for test in test_paths: 447 try: 448 update_test_file(args, test) 449 except Exception: 450 common.warn('Error processing file', test_file=test) 451 raise 452 453 454if __name__ == '__main__': 455 main() 456