1from __future__ import print_function 2import re 3import string 4import subprocess 5import sys 6import copy 7 8if sys.version_info[0] > 2: 9 class string: 10 expandtabs = str.expandtabs 11else: 12 import string 13 14##### Common utilities for update_*test_checks.py 15 16 17_verbose = False 18 19def parse_commandline_args(parser): 20 parser.add_argument('-v', '--verbose', action='store_true', 21 help='Show verbose output') 22 parser.add_argument('-u', '--update-only', action='store_true', 23 help='Only update test if it was already autogened') 24 args = parser.parse_args() 25 global _verbose 26 _verbose = args.verbose 27 return args 28 29def should_add_line_to_output(input_line, prefix_set): 30 # Skip any blank comment lines in the IR. 31 if input_line.strip() == ';': 32 return False 33 # Skip any blank lines in the IR. 34 #if input_line.strip() == '': 35 # return False 36 # And skip any CHECK lines. We're building our own. 37 m = CHECK_RE.match(input_line) 38 if m and m.group(1) in prefix_set: 39 return False 40 41 return True 42 43# Invoke the tool that is being tested. 44def invoke_tool(exe, cmd_args, ir): 45 with open(ir) as ir_file: 46 # TODO Remove the str form which is used by update_test_checks.py and 47 # update_llc_test_checks.py 48 # The safer list form is used by update_cc_test_checks.py 49 if isinstance(cmd_args, list): 50 stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file) 51 else: 52 stdout = subprocess.check_output(exe + ' ' + cmd_args, 53 shell=True, stdin=ir_file) 54 if sys.version_info[0] > 2: 55 stdout = stdout.decode() 56 # Fix line endings to unix CR style. 57 return stdout.replace('\r\n', '\n') 58 59##### LLVM IR parser 60 61RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$') 62CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)') 63PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$') 64CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:') 65 66UTC_ARGS_KEY = 'UTC_ARGS:' 67UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$') 68 69OPT_FUNCTION_RE = re.compile( 70 r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.-]+?)\s*' 71 r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$', 72 flags=(re.M | re.S)) 73 74ANALYZE_FUNCTION_RE = re.compile( 75 r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.-]+?)\':' 76 r'\s*\n(?P<body>.*)$', 77 flags=(re.X | re.S)) 78 79IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@([\w.-]+)\s*\(') 80TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$') 81TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)') 82MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)') 83 84SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)') 85SCRUB_WHITESPACE_RE = re.compile(r'(?!^(| \w))[ \t]+', flags=re.M) 86SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M) 87SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE 88SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M) 89SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n') 90SCRUB_LOOP_COMMENT_RE = re.compile( 91 r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M) 92SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M) 93 94 95def error(msg, test_file=None): 96 if test_file: 97 msg = '{}: {}'.format(msg, test_file) 98 print('ERROR: {}'.format(msg), file=sys.stderr) 99 100def warn(msg, test_file=None): 101 if test_file: 102 msg = '{}: {}'.format(msg, test_file) 103 print('WARNING: {}'.format(msg), file=sys.stderr) 104 105def debug(*args, **kwargs): 106 # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs): 107 if 'file' not in kwargs: 108 kwargs['file'] = sys.stderr 109 if _verbose: 110 print(*args, **kwargs) 111 112def find_run_lines(test, lines): 113 debug('Scanning for RUN lines in test file:', test) 114 raw_lines = [m.group(1) 115 for m in [RUN_LINE_RE.match(l) for l in lines] if m] 116 run_lines = [raw_lines[0]] if len(raw_lines) > 0 else [] 117 for l in raw_lines[1:]: 118 if run_lines[-1].endswith('\\'): 119 run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l 120 else: 121 run_lines.append(l) 122 debug('Found {} RUN lines in {}:'.format(len(run_lines), test)) 123 for l in run_lines: 124 debug(' RUN: {}'.format(l)) 125 return run_lines 126 127def scrub_body(body): 128 # Scrub runs of whitespace out of the assembly, but leave the leading 129 # whitespace in place. 130 body = SCRUB_WHITESPACE_RE.sub(r' ', body) 131 # Expand the tabs used for indentation. 132 body = string.expandtabs(body, 2) 133 # Strip trailing whitespace. 134 body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body) 135 return body 136 137def do_scrub(body, scrubber, scrubber_args, extra): 138 if scrubber_args: 139 local_args = copy.deepcopy(scrubber_args) 140 local_args[0].extra_scrub = extra 141 return scrubber(body, *local_args) 142 return scrubber(body, *scrubber_args) 143 144# Build up a dictionary of all the function bodies. 145class function_body(object): 146 def __init__(self, string, extra, args_and_sig): 147 self.scrub = string 148 self.extrascrub = extra 149 self.args_and_sig = args_and_sig 150 def is_same_except_arg_names(self, extrascrub, args_and_sig): 151 arg_names = set() 152 def drop_arg_names(match): 153 arg_names.add(match.group(2)) 154 return match.group(1) + match.group(3) 155 def repl_arg_names(match): 156 if match.group(2) in arg_names: 157 return match.group(1) + match.group(3) 158 return match.group(1) + match.group(2) + match.group(3) 159 ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig) 160 ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig) 161 if ans0 != ans1: 162 return False 163 es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub) 164 es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub) 165 es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0) 166 es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1) 167 return es0 == es1 168 169 def __str__(self): 170 return self.scrub 171 172def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args): 173 for m in function_re.finditer(raw_tool_output): 174 if not m: 175 continue 176 func = m.group('func') 177 body = m.group('body') 178 # Determine if we print arguments, the opening brace, or nothing after the function name 179 if record_args and 'args_and_sig' in m.groupdict(): 180 args_and_sig = scrub_body(m.group('args_and_sig').strip()) 181 elif 'args_and_sig' in m.groupdict(): 182 args_and_sig = '(' 183 else: 184 args_and_sig = '' 185 scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False) 186 scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True) 187 if 'analysis' in m.groupdict(): 188 analysis = m.group('analysis') 189 if analysis.lower() != 'cost model analysis': 190 warn('Unsupported analysis mode: %r!' % (analysis,)) 191 if func.startswith('stress'): 192 # We only use the last line of the function body for stress tests. 193 scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:]) 194 if verbose: 195 print('Processing function: ' + func, file=sys.stderr) 196 for l in scrubbed_body.splitlines(): 197 print(' ' + l, file=sys.stderr) 198 for prefix in prefixes: 199 if func in func_dict[prefix] and (str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and func_dict[prefix][func].args_and_sig != args_and_sig)): 200 if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig): 201 func_dict[prefix][func].scrub = scrubbed_extra 202 func_dict[prefix][func].args_and_sig = args_and_sig 203 continue 204 else: 205 if prefix == prefixes[-1]: 206 warn('Found conflicting asm under the same prefix: %r!' % (prefix,)) 207 else: 208 func_dict[prefix][func] = None 209 continue 210 211 func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig) 212 213##### Generator of LLVM IR CHECK lines 214 215SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*') 216 217# Match things that look at identifiers, but only if they are followed by 218# spaces, commas, paren, or end of the string 219IR_VALUE_RE = re.compile(r'(\s+)%([\w.-]+?)([,\s\(\)]|\Z)') 220 221NAMELESS_PREFIX = "TMP" 222 223# Create a FileCheck variable name based on an IR name. 224def get_value_name(var): 225 if var.isdigit(): 226 var = NAMELESS_PREFIX + var 227 var = var.replace('.', '_') 228 var = var.replace('-', '_') 229 return var.upper() 230 231 232# Create a FileCheck variable from regex. 233def get_value_definition(var): 234 return '[[' + get_value_name(var) + ':%.*]]' 235 236 237# Use a FileCheck variable. 238def get_value_use(var): 239 return '[[' + get_value_name(var) + ']]' 240 241# Replace IR value defs and uses with FileCheck variables. 242def genericize_check_lines(lines, is_analyze, vars_seen): 243 # This gets called for each match that occurs in 244 # a line. We transform variables we haven't seen 245 # into defs, and variables we have seen into uses. 246 def transform_line_vars(match): 247 var = match.group(2) 248 if NAMELESS_PREFIX.lower() in var.lower(): 249 warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,)) 250 if var in vars_seen: 251 rv = get_value_use(var) 252 else: 253 vars_seen.add(var) 254 rv = get_value_definition(var) 255 # re.sub replaces the entire regex match 256 # with whatever you return, so we have 257 # to make sure to hand it back everything 258 # including the commas and spaces. 259 return match.group(1) + rv + match.group(3) 260 261 lines_with_def = [] 262 263 for i, line in enumerate(lines): 264 # An IR variable named '%.' matches the FileCheck regex string. 265 line = line.replace('%.', '%dot') 266 # Ignore any comments, since the check lines will too. 267 scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line) 268 if is_analyze: 269 lines[i] = scrubbed_line 270 else: 271 lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line) 272 return lines 273 274 275def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze): 276 # prefix_blacklist are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well. 277 prefix_blacklist = set() 278 printed_prefixes = [] 279 for p in prefix_list: 280 checkprefixes = p[0] 281 # If not all checkprefixes of this run line produced the function we cannot check for it as it does not 282 # exist for this run line. A subset of the check prefixes might know about the function but only because 283 # other run lines created it. 284 if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)): 285 prefix_blacklist |= set(checkprefixes) 286 continue 287 288 # prefix_blacklist is constructed, we can now emit the output 289 for p in prefix_list: 290 checkprefixes = p[0] 291 for checkprefix in checkprefixes: 292 if checkprefix in printed_prefixes: 293 break 294 295 # Check if the prefix is blacklisted. 296 if checkprefix in prefix_blacklist: 297 continue 298 299 # If we do not have output for this prefix we skip it. 300 if not func_dict[checkprefix][func_name]: 301 continue 302 303 # Add some space between different check prefixes, but not after the last 304 # check line (before the test code). 305 if is_asm: 306 if len(printed_prefixes) != 0: 307 output_lines.append(comment_marker) 308 309 vars_seen = set() 310 printed_prefixes.append(checkprefix) 311 args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig) 312 args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0] 313 if '[[' in args_and_sig: 314 output_lines.append(check_label_format % (checkprefix, func_name, '')) 315 output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig)) 316 else: 317 output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig)) 318 func_body = str(func_dict[checkprefix][func_name]).splitlines() 319 320 # For ASM output, just emit the check lines. 321 if is_asm: 322 output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 323 for func_line in func_body[1:]: 324 if func_line.strip() == '': 325 output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix)) 326 else: 327 output_lines.append('%s %s-NEXT: %s' % (comment_marker, checkprefix, func_line)) 328 break 329 330 # For IR output, change all defs to FileCheck variables, so we're immune 331 # to variable naming fashions. 332 func_body = genericize_check_lines(func_body, is_analyze, vars_seen) 333 334 # This could be selectively enabled with an optional invocation argument. 335 # Disabled for now: better to check everything. Be safe rather than sorry. 336 337 # Handle the first line of the function body as a special case because 338 # it's often just noise (a useless asm comment or entry label). 339 #if func_body[0].startswith("#") or func_body[0].startswith("entry:"): 340 # is_blank_line = True 341 #else: 342 # output_lines.append('%s %s: %s' % (comment_marker, checkprefix, func_body[0])) 343 # is_blank_line = False 344 345 is_blank_line = False 346 347 for func_line in func_body: 348 if func_line.strip() == '': 349 is_blank_line = True 350 continue 351 # Do not waste time checking IR comments. 352 func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line) 353 354 # Skip blank lines instead of checking them. 355 if is_blank_line: 356 output_lines.append('{} {}: {}'.format( 357 comment_marker, checkprefix, func_line)) 358 else: 359 output_lines.append('{} {}-NEXT: {}'.format( 360 comment_marker, checkprefix, func_line)) 361 is_blank_line = False 362 363 # Add space between different check prefixes and also before the first 364 # line of code in the test function. 365 output_lines.append(comment_marker) 366 break 367 368def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict, 369 func_name, preserve_names, function_sig): 370 # Label format is based on IR string. 371 function_def_regex = 'define {{[^@]+}}' if function_sig else '' 372 check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex) 373 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, 374 check_label_format, False, preserve_names) 375 376def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): 377 check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker) 378 add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True) 379 380 381def check_prefix(prefix): 382 if not PREFIX_RE.match(prefix): 383 hint = "" 384 if ',' in prefix: 385 hint = " Did you mean '--check-prefixes=" + prefix + "'?" 386 warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) % 387 (prefix)) 388 389 390def verify_filecheck_prefixes(fc_cmd): 391 fc_cmd_parts = fc_cmd.split() 392 for part in fc_cmd_parts: 393 if "check-prefix=" in part: 394 prefix = part.split('=', 1)[1] 395 check_prefix(prefix) 396 elif "check-prefixes=" in part: 397 prefixes = part.split('=', 1)[1].split(',') 398 for prefix in prefixes: 399 check_prefix(prefix) 400 if prefixes.count(prefix) > 1: 401 warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,)) 402 403 404def get_autogennote_suffix(parser, args): 405 autogenerated_note_args = '' 406 for action in parser._actions: 407 if not hasattr(args, action.dest): 408 continue # Ignore options such as --help that aren't included in args 409 # Ignore parameters such as paths to the binary or the list of tests 410 if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary', 411 'clang', 'opt', 'llvm_bin', 'verbose'): 412 continue 413 value = getattr(args, action.dest) 414 if action.const is not None: # action stores a constant (usually True/False) 415 # Skip actions with different constant values (this happens with boolean 416 # --foo/--no-foo options) 417 if value != action.const: 418 continue 419 if parser.get_default(action.dest) == value: 420 continue # Don't add default values 421 autogenerated_note_args += action.option_strings[0] + ' ' 422 if action.const is None: # action takes a parameter 423 autogenerated_note_args += '%s ' % value 424 if autogenerated_note_args: 425 autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1]) 426 return autogenerated_note_args 427 428 429def check_for_command(line, parser, args, argv): 430 cmd_m = UTC_ARGS_CMD.match(line) 431 if cmd_m: 432 cmd = cmd_m.group('cmd').strip().split(' ') 433 argv = argv + cmd 434 args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv)) 435 return args, argv 436