xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision aae413462fae16c481df31ff23b951c5df494a60)
1from __future__ import print_function
2
3import copy
4import glob
5import re
6import subprocess
7import sys
8
9if sys.version_info[0] > 2:
10  class string:
11    expandtabs = str.expandtabs
12else:
13  import string
14
15##### Common utilities for update_*test_checks.py
16
17
18_verbose = False
19
20def parse_commandline_args(parser):
21  parser.add_argument('-v', '--verbose', action='store_true',
22                      help='Show verbose output')
23  parser.add_argument('-u', '--update-only', action='store_true',
24                      help='Only update test if it was already autogened')
25  parser.add_argument('--force-update', action='store_true',
26                      help='Update test even if it was autogened by a different script')
27  parser.add_argument('--enable', action='store_true', dest='enabled', default=True,
28                       help='Activate CHECK line generation from this point forward')
29  parser.add_argument('--disable', action='store_false', dest='enabled',
30                      help='Deactivate CHECK line generation from this point forward')
31  args = parser.parse_args()
32  global _verbose
33  _verbose = args.verbose
34  return args
35
36
37class InputLineInfo(object):
38  def __init__(self, line, line_number, args, argv):
39    self.line = line
40    self.line_number = line_number
41    self.args = args
42    self.argv = argv
43
44
45class TestInfo(object):
46  def __init__(self, test, parser, script_name, input_lines, args, argv,
47               comment_prefix):
48    self.parser = parser
49    self.path = test
50    self.args = args
51    self.argv = argv
52    self.input_lines = input_lines
53    self.run_lines = find_run_lines(test, self.input_lines)
54    self.comment_prefix = comment_prefix
55    if self.comment_prefix is None:
56      if self.path.endswith('.mir'):
57        self.comment_prefix = '#'
58      else:
59        self.comment_prefix = ';'
60    self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT
61    self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
62    self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
63
64  def iterlines(self, output_lines):
65    output_lines.append(self.test_autogenerated_note)
66    for line_num, input_line in enumerate(self.input_lines):
67      # Discard any previous script advertising.
68      if input_line.startswith(self.autogenerated_note_prefix):
69        continue
70      self.args, self.argv = check_for_command(input_line, self.parser,
71                                               self.args, self.argv)
72      if not self.args.enabled:
73        output_lines.append(input_line)
74        continue
75      yield InputLineInfo(input_line, line_num, self.args, self.argv)
76
77
78def itertests(test_patterns, parser, script_name, comment_prefix=None):
79  for pattern in test_patterns:
80    # On Windows we must expand the patterns ourselves.
81    tests_list = glob.glob(pattern)
82    if not tests_list:
83      warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
84      continue
85    for test in tests_list:
86      with open(test) as f:
87        input_lines = [l.rstrip() for l in f]
88      args = parser.parse_args()
89      argv = sys.argv[:]
90      first_line = input_lines[0] if input_lines else ""
91      if UTC_ADVERT in first_line:
92        if script_name not in first_line and not args.force_update:
93          warn("Skipping test which wasn't autogenerated by " + script_name, test)
94          continue
95        args, argv = check_for_command(first_line, parser, args, argv)
96      elif args.update_only:
97        assert UTC_ADVERT not in first_line
98        warn("Skipping test which isn't autogenerated: " + test)
99        continue
100      yield TestInfo(test, parser, script_name, input_lines, args, argv,
101                     comment_prefix)
102
103
104def should_add_line_to_output(input_line, prefix_set):
105  # Skip any blank comment lines in the IR.
106  if input_line.strip() == ';':
107    return False
108  # Skip any blank lines in the IR.
109  #if input_line.strip() == '':
110  #  return False
111  # And skip any CHECK lines. We're building our own.
112  m = CHECK_RE.match(input_line)
113  if m and m.group(1) in prefix_set:
114    return False
115
116  return True
117
118# Invoke the tool that is being tested.
119def invoke_tool(exe, cmd_args, ir):
120  with open(ir) as ir_file:
121    # TODO Remove the str form which is used by update_test_checks.py and
122    # update_llc_test_checks.py
123    # The safer list form is used by update_cc_test_checks.py
124    if isinstance(cmd_args, list):
125      stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
126    else:
127      stdout = subprocess.check_output(exe + ' ' + cmd_args,
128                                       shell=True, stdin=ir_file)
129    if sys.version_info[0] > 2:
130      stdout = stdout.decode()
131  # Fix line endings to unix CR style.
132  return stdout.replace('\r\n', '\n')
133
134##### LLVM IR parser
135RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
136CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
137PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
138CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
139
140UTC_ARGS_KEY = 'UTC_ARGS:'
141UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
142UTC_ADVERT = 'NOTE: Assertions have been autogenerated by '
143
144OPT_FUNCTION_RE = re.compile(
145    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.-]+?)\s*'
146    r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$',
147    flags=(re.M | re.S))
148
149ANALYZE_FUNCTION_RE = re.compile(
150    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.-]+?)\':'
151    r'\s*\n(?P<body>.*)$',
152    flags=(re.X | re.S))
153
154IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@([\w.-]+)\s*\(')
155TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
156TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
157MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
158
159SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
160SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
161SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
162SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
163SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
164SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
165SCRUB_LOOP_COMMENT_RE = re.compile(
166    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
167SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
168
169
170def error(msg, test_file=None):
171  if test_file:
172    msg = '{}: {}'.format(msg, test_file)
173  print('ERROR: {}'.format(msg), file=sys.stderr)
174
175def warn(msg, test_file=None):
176  if test_file:
177    msg = '{}: {}'.format(msg, test_file)
178  print('WARNING: {}'.format(msg), file=sys.stderr)
179
180def debug(*args, **kwargs):
181  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
182  if 'file' not in kwargs:
183    kwargs['file'] = sys.stderr
184  if _verbose:
185    print(*args, **kwargs)
186
187def find_run_lines(test, lines):
188  debug('Scanning for RUN lines in test file:', test)
189  raw_lines = [m.group(1)
190               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
191  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
192  for l in raw_lines[1:]:
193    if run_lines[-1].endswith('\\'):
194      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
195    else:
196      run_lines.append(l)
197  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
198  for l in run_lines:
199    debug('  RUN: {}'.format(l))
200  return run_lines
201
202def scrub_body(body):
203  # Scrub runs of whitespace out of the assembly, but leave the leading
204  # whitespace in place.
205  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
206  # Expand the tabs used for indentation.
207  body = string.expandtabs(body, 2)
208  # Strip trailing whitespace.
209  body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
210  return body
211
212def do_scrub(body, scrubber, scrubber_args, extra):
213  if scrubber_args:
214    local_args = copy.deepcopy(scrubber_args)
215    local_args[0].extra_scrub = extra
216    return scrubber(body, *local_args)
217  return scrubber(body, *scrubber_args)
218
219# Build up a dictionary of all the function bodies.
220class function_body(object):
221  def __init__(self, string, extra, args_and_sig):
222    self.scrub = string
223    self.extrascrub = extra
224    self.args_and_sig = args_and_sig
225  def is_same_except_arg_names(self, extrascrub, args_and_sig):
226    arg_names = set()
227    def drop_arg_names(match):
228        arg_names.add(match.group(2))
229        return match.group(1) + match.group(3)
230    def repl_arg_names(match):
231        if match.group(2) in arg_names:
232            return match.group(1) + match.group(3)
233        return match.group(1) + match.group(2) + match.group(3)
234    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
235    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
236    if ans0 != ans1:
237        return False
238    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
239    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
240    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
241    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
242    return es0 == es1
243
244  def __str__(self):
245    return self.scrub
246
247def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args):
248  for m in function_re.finditer(raw_tool_output):
249    if not m:
250      continue
251    func = m.group('func')
252    body = m.group('body')
253    # Determine if we print arguments, the opening brace, or nothing after the function name
254    if record_args and 'args_and_sig' in m.groupdict():
255        args_and_sig = scrub_body(m.group('args_and_sig').strip())
256    elif 'args_and_sig' in m.groupdict():
257        args_and_sig = '('
258    else:
259        args_and_sig = ''
260    scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
261    scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
262    if 'analysis' in m.groupdict():
263      analysis = m.group('analysis')
264      if analysis.lower() != 'cost model analysis':
265        warn('Unsupported analysis mode: %r!' % (analysis,))
266    if func.startswith('stress'):
267      # We only use the last line of the function body for stress tests.
268      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
269    if verbose:
270      print('Processing function: ' + func, file=sys.stderr)
271      for l in scrubbed_body.splitlines():
272        print('  ' + l, file=sys.stderr)
273    for prefix in prefixes:
274      if func in func_dict[prefix] and (str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and func_dict[prefix][func].args_and_sig != args_and_sig)):
275        if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig):
276          func_dict[prefix][func].scrub = scrubbed_extra
277          func_dict[prefix][func].args_and_sig = args_and_sig
278          continue
279        else:
280          if prefix == prefixes[-1]:
281            warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
282          else:
283            func_dict[prefix][func] = None
284            continue
285
286      func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig)
287
288##### Generator of LLVM IR CHECK lines
289
290SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
291
292# Match things that look at identifiers, but only if they are followed by
293# spaces, commas, paren, or end of the string
294IR_VALUE_RE = re.compile(r'(\s+)%([\w.-]+?)([,\s\(\)]|\Z)')
295
296NAMELESS_PREFIX = "TMP"
297
298# Create a FileCheck variable name based on an IR name.
299def get_value_name(var):
300  if var.isdigit():
301    var = NAMELESS_PREFIX + var
302  var = var.replace('.', '_')
303  var = var.replace('-', '_')
304  return var.upper()
305
306
307# Create a FileCheck variable from regex.
308def get_value_definition(var):
309  return '[[' + get_value_name(var) + ':%.*]]'
310
311
312# Use a FileCheck variable.
313def get_value_use(var):
314  return '[[' + get_value_name(var) + ']]'
315
316# Replace IR value defs and uses with FileCheck variables.
317def genericize_check_lines(lines, is_analyze, vars_seen):
318  # This gets called for each match that occurs in
319  # a line. We transform variables we haven't seen
320  # into defs, and variables we have seen into uses.
321  def transform_line_vars(match):
322    var = match.group(2)
323    if NAMELESS_PREFIX.lower() in var.lower():
324      warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,))
325    if var in vars_seen:
326      rv = get_value_use(var)
327    else:
328      vars_seen.add(var)
329      rv = get_value_definition(var)
330    # re.sub replaces the entire regex match
331    # with whatever you return, so we have
332    # to make sure to hand it back everything
333    # including the commas and spaces.
334    return match.group(1) + rv + match.group(3)
335
336  lines_with_def = []
337
338  for i, line in enumerate(lines):
339    # An IR variable named '%.' matches the FileCheck regex string.
340    line = line.replace('%.', '%dot')
341    # Ignore any comments, since the check lines will too.
342    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
343    if is_analyze:
344      lines[i] = scrubbed_line
345    else:
346      lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
347  return lines
348
349
350def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
351  # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
352  prefix_exclusions = set()
353  printed_prefixes = []
354  for p in prefix_list:
355    checkprefixes = p[0]
356    # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
357    # exist for this run line. A subset of the check prefixes might know about the function but only because
358    # other run lines created it.
359    if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
360        prefix_exclusions |= set(checkprefixes)
361        continue
362
363  # prefix_exclusions is constructed, we can now emit the output
364  for p in prefix_list:
365    checkprefixes = p[0]
366    for checkprefix in checkprefixes:
367      if checkprefix in printed_prefixes:
368        break
369
370      # Check if the prefix is excluded.
371      if checkprefix in prefix_exclusions:
372        continue
373
374      # If we do not have output for this prefix we skip it.
375      if not func_dict[checkprefix][func_name]:
376        continue
377
378      # Add some space between different check prefixes, but not after the last
379      # check line (before the test code).
380      if is_asm:
381        if len(printed_prefixes) != 0:
382          output_lines.append(comment_marker)
383
384      vars_seen = set()
385      printed_prefixes.append(checkprefix)
386      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
387      args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0]
388      if '[[' in args_and_sig:
389        output_lines.append(check_label_format % (checkprefix, func_name, ''))
390        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
391      else:
392        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
393      func_body = str(func_dict[checkprefix][func_name]).splitlines()
394
395      # For ASM output, just emit the check lines.
396      if is_asm:
397        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
398        for func_line in func_body[1:]:
399          if func_line.strip() == '':
400            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
401          else:
402            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
403        break
404
405      # For IR output, change all defs to FileCheck variables, so we're immune
406      # to variable naming fashions.
407      func_body = genericize_check_lines(func_body, is_analyze, vars_seen)
408
409      # This could be selectively enabled with an optional invocation argument.
410      # Disabled for now: better to check everything. Be safe rather than sorry.
411
412      # Handle the first line of the function body as a special case because
413      # it's often just noise (a useless asm comment or entry label).
414      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
415      #  is_blank_line = True
416      #else:
417      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
418      #  is_blank_line = False
419
420      is_blank_line = False
421
422      for func_line in func_body:
423        if func_line.strip() == '':
424          is_blank_line = True
425          continue
426        # Do not waste time checking IR comments.
427        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
428
429        # Skip blank lines instead of checking them.
430        if is_blank_line:
431          output_lines.append('{} {}:       {}'.format(
432              comment_marker, checkprefix, func_line))
433        else:
434          output_lines.append('{} {}-NEXT:  {}'.format(
435              comment_marker, checkprefix, func_line))
436        is_blank_line = False
437
438      # Add space between different check prefixes and also before the first
439      # line of code in the test function.
440      output_lines.append(comment_marker)
441      break
442
443def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
444                  func_name, preserve_names, function_sig):
445  # Label format is based on IR string.
446  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
447  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
448  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
449             check_label_format, False, preserve_names)
450
451def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
452  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
453  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
454
455
456def check_prefix(prefix):
457  if not PREFIX_RE.match(prefix):
458        hint = ""
459        if ',' in prefix:
460          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
461        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
462             (prefix))
463
464
465def verify_filecheck_prefixes(fc_cmd):
466  fc_cmd_parts = fc_cmd.split()
467  for part in fc_cmd_parts:
468    if "check-prefix=" in part:
469      prefix = part.split('=', 1)[1]
470      check_prefix(prefix)
471    elif "check-prefixes=" in part:
472      prefixes = part.split('=', 1)[1].split(',')
473      for prefix in prefixes:
474        check_prefix(prefix)
475        if prefixes.count(prefix) > 1:
476          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
477
478
479def get_autogennote_suffix(parser, args):
480  autogenerated_note_args = ''
481  for action in parser._actions:
482    if not hasattr(args, action.dest):
483      continue  # Ignore options such as --help that aren't included in args
484    # Ignore parameters such as paths to the binary or the list of tests
485    if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary',
486                       'clang', 'opt', 'llvm_bin', 'verbose'):
487      continue
488    value = getattr(args, action.dest)
489    if action.const is not None:  # action stores a constant (usually True/False)
490      # Skip actions with different constant values (this happens with boolean
491      # --foo/--no-foo options)
492      if value != action.const:
493        continue
494    if parser.get_default(action.dest) == value:
495      continue  # Don't add default values
496    autogenerated_note_args += action.option_strings[0] + ' '
497    if action.const is None:  # action takes a parameter
498      autogenerated_note_args += '%s ' % value
499  if autogenerated_note_args:
500    autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
501  return autogenerated_note_args
502
503
504def check_for_command(line, parser, args, argv):
505    cmd_m = UTC_ARGS_CMD.match(line)
506    if cmd_m:
507        cmd = cmd_m.group('cmd').strip().split(' ')
508        argv = argv + cmd
509        args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
510    return args, argv
511