xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision 937bad3594e7cba44cddae4661e1bae911beba3e)
1from __future__ import print_function
2
3import copy
4import glob
5import re
6import subprocess
7import sys
8
9if sys.version_info[0] > 2:
10  class string:
11    expandtabs = str.expandtabs
12else:
13  import string
14
15##### Common utilities for update_*test_checks.py
16
17
18_verbose = False
19
20def parse_commandline_args(parser):
21  parser.add_argument('-v', '--verbose', action='store_true',
22                      help='Show verbose output')
23  parser.add_argument('-u', '--update-only', action='store_true',
24                      help='Only update test if it was already autogened')
25  parser.add_argument('--force-update', action='store_true',
26                      help='Update test even if it was autogened by a different script')
27  parser.add_argument('--enable', action='store_true', dest='enabled', default=True,
28                       help='Activate CHECK line generation from this point forward')
29  parser.add_argument('--disable', action='store_false', dest='enabled',
30                      help='Deactivate CHECK line generation from this point forward')
31  args = parser.parse_args()
32  global _verbose
33  _verbose = args.verbose
34  return args
35
36
37class InputLineInfo(object):
38  def __init__(self, line, line_number, args, argv):
39    self.line = line
40    self.line_number = line_number
41    self.args = args
42    self.argv = argv
43
44
45class TestInfo(object):
46  def __init__(self, test, parser, script_name, input_lines, args, argv,
47               comment_prefix):
48    self.parser = parser
49    self.path = test
50    self.args = args
51    self.argv = argv
52    self.input_lines = input_lines
53    self.run_lines = find_run_lines(test, self.input_lines)
54    self.comment_prefix = comment_prefix
55    if self.comment_prefix is None:
56      if self.path.endswith('.mir'):
57        self.comment_prefix = '#'
58      else:
59        self.comment_prefix = ';'
60    self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT
61    self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
62    self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
63
64  def iterlines(self, output_lines):
65    output_lines.append(self.test_autogenerated_note)
66    for line_num, input_line in enumerate(self.input_lines):
67      # Discard any previous script advertising.
68      if input_line.startswith(self.autogenerated_note_prefix):
69        continue
70      self.args, self.argv = check_for_command(input_line, self.parser,
71                                               self.args, self.argv)
72      if not self.args.enabled:
73        output_lines.append(input_line)
74        continue
75      yield InputLineInfo(input_line, line_num, self.args, self.argv)
76
77
78def itertests(test_patterns, parser, script_name, comment_prefix=None):
79  for pattern in test_patterns:
80    # On Windows we must expand the patterns ourselves.
81    tests_list = glob.glob(pattern)
82    if not tests_list:
83      warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
84      continue
85    for test in tests_list:
86      with open(test) as f:
87        input_lines = [l.rstrip() for l in f]
88      args = parser.parse_args()
89      argv = sys.argv[:]
90      first_line = input_lines[0] if input_lines else ""
91      if UTC_ADVERT in first_line:
92        if script_name not in first_line and not args.force_update:
93          warn("Skipping test which wasn't autogenerated by " + script_name, test)
94          continue
95        args, argv = check_for_command(first_line, parser, args, argv)
96      elif args.update_only:
97        assert UTC_ADVERT not in first_line
98        warn("Skipping test which isn't autogenerated: " + test)
99        continue
100      yield TestInfo(test, parser, script_name, input_lines, args, argv,
101                     comment_prefix)
102
103
104def should_add_line_to_output(input_line, prefix_set):
105  # Skip any blank comment lines in the IR.
106  if input_line.strip() == ';':
107    return False
108  # Skip any blank lines in the IR.
109  #if input_line.strip() == '':
110  #  return False
111  # And skip any CHECK lines. We're building our own.
112  m = CHECK_RE.match(input_line)
113  if m and m.group(1) in prefix_set:
114    return False
115
116  return True
117
118# Invoke the tool that is being tested.
119def invoke_tool(exe, cmd_args, ir):
120  with open(ir) as ir_file:
121    # TODO Remove the str form which is used by update_test_checks.py and
122    # update_llc_test_checks.py
123    # The safer list form is used by update_cc_test_checks.py
124    if isinstance(cmd_args, list):
125      stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
126    else:
127      stdout = subprocess.check_output(exe + ' ' + cmd_args,
128                                       shell=True, stdin=ir_file)
129    if sys.version_info[0] > 2:
130      stdout = stdout.decode()
131  # Fix line endings to unix CR style.
132  return stdout.replace('\r\n', '\n')
133
134##### LLVM IR parser
135RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
136CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
137PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
138CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
139
140UTC_ARGS_KEY = 'UTC_ARGS:'
141UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
142UTC_ADVERT = 'NOTE: Assertions have been autogenerated by '
143
144OPT_FUNCTION_RE = re.compile(
145    r'^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.-]+?)\s*'
146    r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$',
147    flags=(re.M | re.S))
148
149ANALYZE_FUNCTION_RE = re.compile(
150    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.-]+?)\':'
151    r'\s*\n(?P<body>.*)$',
152    flags=(re.X | re.S))
153
154IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@([\w.-]+)\s*\(')
155TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
156TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
157MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
158
159SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
160SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
161SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
162SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
163SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
164SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
165SCRUB_LOOP_COMMENT_RE = re.compile(
166    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
167SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
168
169
170def error(msg, test_file=None):
171  if test_file:
172    msg = '{}: {}'.format(msg, test_file)
173  print('ERROR: {}'.format(msg), file=sys.stderr)
174
175def warn(msg, test_file=None):
176  if test_file:
177    msg = '{}: {}'.format(msg, test_file)
178  print('WARNING: {}'.format(msg), file=sys.stderr)
179
180def debug(*args, **kwargs):
181  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
182  if 'file' not in kwargs:
183    kwargs['file'] = sys.stderr
184  if _verbose:
185    print(*args, **kwargs)
186
187def find_run_lines(test, lines):
188  debug('Scanning for RUN lines in test file:', test)
189  raw_lines = [m.group(1)
190               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
191  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
192  for l in raw_lines[1:]:
193    if run_lines[-1].endswith('\\'):
194      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
195    else:
196      run_lines.append(l)
197  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
198  for l in run_lines:
199    debug('  RUN: {}'.format(l))
200  return run_lines
201
202def scrub_body(body):
203  # Scrub runs of whitespace out of the assembly, but leave the leading
204  # whitespace in place.
205  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
206  # Expand the tabs used for indentation.
207  body = string.expandtabs(body, 2)
208  # Strip trailing whitespace.
209  body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
210  return body
211
212def do_scrub(body, scrubber, scrubber_args, extra):
213  if scrubber_args:
214    local_args = copy.deepcopy(scrubber_args)
215    local_args[0].extra_scrub = extra
216    return scrubber(body, *local_args)
217  return scrubber(body, *scrubber_args)
218
219# Build up a dictionary of all the function bodies.
220class function_body(object):
221  def __init__(self, string, extra, args_and_sig, attrs):
222    self.scrub = string
223    self.extrascrub = extra
224    self.args_and_sig = args_and_sig
225    self.attrs = attrs
226  def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs):
227    arg_names = set()
228    def drop_arg_names(match):
229        arg_names.add(match.group(2))
230        return match.group(1) + match.group(3)
231    def repl_arg_names(match):
232        if match.group(2) in arg_names:
233            return match.group(1) + match.group(3)
234        return match.group(1) + match.group(2) + match.group(3)
235    if self.attrs != attrs:
236      return False
237    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
238    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
239    if ans0 != ans1:
240        return False
241    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
242    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
243    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
244    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
245    return es0 == es1
246
247  def __str__(self):
248    return self.scrub
249
250def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args, check_attributes):
251  for m in function_re.finditer(raw_tool_output):
252    if not m:
253      continue
254    func = m.group('func')
255    body = m.group('body')
256    attrs = m.group('attrs') if check_attributes else ''
257    # Determine if we print arguments, the opening brace, or nothing after the function name
258    if record_args and 'args_and_sig' in m.groupdict():
259        args_and_sig = scrub_body(m.group('args_and_sig').strip())
260    elif 'args_and_sig' in m.groupdict():
261        args_and_sig = '('
262    else:
263        args_and_sig = ''
264    scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
265    scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
266    if 'analysis' in m.groupdict():
267      analysis = m.group('analysis')
268      if analysis.lower() != 'cost model analysis':
269        warn('Unsupported analysis mode: %r!' % (analysis,))
270    if func.startswith('stress'):
271      # We only use the last line of the function body for stress tests.
272      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
273    if verbose:
274      print('Processing function: ' + func, file=sys.stderr)
275      for l in scrubbed_body.splitlines():
276        print('  ' + l, file=sys.stderr)
277    for prefix in prefixes:
278      if func in func_dict[prefix]:
279        if str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and (func_dict[prefix][func].args_and_sig != args_and_sig or func_dict[prefix][func].attrs != attrs)):
280          if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig, attrs):
281            func_dict[prefix][func].scrub = scrubbed_extra
282            func_dict[prefix][func].args_and_sig = args_and_sig
283            continue
284          else:
285            if prefix == prefixes[-1]:
286              warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
287            else:
288              func_dict[prefix][func] = None
289              continue
290
291      func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig, attrs)
292
293##### Generator of LLVM IR CHECK lines
294
295SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
296
297# Match things that look at identifiers, but only if they are followed by
298# spaces, commas, paren, or end of the string
299IR_VALUE_RE = re.compile(r'(\s+)%([\w.-]+?)([,\s\(\)]|\Z)')
300
301NAMELESS_PREFIX = "TMP"
302
303# Create a FileCheck variable name based on an IR name.
304def get_value_name(var):
305  if var.isdigit():
306    var = NAMELESS_PREFIX + var
307  var = var.replace('.', '_')
308  var = var.replace('-', '_')
309  return var.upper()
310
311
312# Create a FileCheck variable from regex.
313def get_value_definition(var):
314  return '[[' + get_value_name(var) + ':%.*]]'
315
316
317# Use a FileCheck variable.
318def get_value_use(var):
319  return '[[' + get_value_name(var) + ']]'
320
321# Replace IR value defs and uses with FileCheck variables.
322def genericize_check_lines(lines, is_analyze, vars_seen):
323  # This gets called for each match that occurs in
324  # a line. We transform variables we haven't seen
325  # into defs, and variables we have seen into uses.
326  def transform_line_vars(match):
327    var = match.group(2)
328    if NAMELESS_PREFIX.lower() in var.lower():
329      warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,))
330    if var in vars_seen:
331      rv = get_value_use(var)
332    else:
333      vars_seen.add(var)
334      rv = get_value_definition(var)
335    # re.sub replaces the entire regex match
336    # with whatever you return, so we have
337    # to make sure to hand it back everything
338    # including the commas and spaces.
339    return match.group(1) + rv + match.group(3)
340
341  lines_with_def = []
342
343  for i, line in enumerate(lines):
344    # An IR variable named '%.' matches the FileCheck regex string.
345    line = line.replace('%.', '%dot')
346    # Ignore any comments, since the check lines will too.
347    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
348    if is_analyze:
349      lines[i] = scrubbed_line
350    else:
351      lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
352  return lines
353
354
355def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
356  # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
357  prefix_exclusions = set()
358  printed_prefixes = []
359  for p in prefix_list:
360    checkprefixes = p[0]
361    # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
362    # exist for this run line. A subset of the check prefixes might know about the function but only because
363    # other run lines created it.
364    if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
365        prefix_exclusions |= set(checkprefixes)
366        continue
367
368  # prefix_exclusions is constructed, we can now emit the output
369  for p in prefix_list:
370    checkprefixes = p[0]
371    for checkprefix in checkprefixes:
372      if checkprefix in printed_prefixes:
373        break
374
375      # Check if the prefix is excluded.
376      if checkprefix in prefix_exclusions:
377        continue
378
379      # If we do not have output for this prefix we skip it.
380      if not func_dict[checkprefix][func_name]:
381        continue
382
383      # Add some space between different check prefixes, but not after the last
384      # check line (before the test code).
385      if is_asm:
386        if len(printed_prefixes) != 0:
387          output_lines.append(comment_marker)
388
389      vars_seen = set()
390      printed_prefixes.append(checkprefix)
391      attrs = str(func_dict[checkprefix][func_name].attrs)
392      attrs = '' if attrs == 'None' else attrs
393      if attrs:
394        output_lines.append('%s %s: Function Attrs: %s;' % (comment_marker, checkprefix, attrs))
395      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
396      args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0]
397      if '[[' in args_and_sig:
398        output_lines.append(check_label_format % (checkprefix, func_name, ''))
399        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
400      else:
401        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
402      func_body = str(func_dict[checkprefix][func_name]).splitlines()
403
404      # For ASM output, just emit the check lines.
405      if is_asm:
406        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
407        for func_line in func_body[1:]:
408          if func_line.strip() == '':
409            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
410          else:
411            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
412        break
413
414      # For IR output, change all defs to FileCheck variables, so we're immune
415      # to variable naming fashions.
416      func_body = genericize_check_lines(func_body, is_analyze, vars_seen)
417
418      # This could be selectively enabled with an optional invocation argument.
419      # Disabled for now: better to check everything. Be safe rather than sorry.
420
421      # Handle the first line of the function body as a special case because
422      # it's often just noise (a useless asm comment or entry label).
423      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
424      #  is_blank_line = True
425      #else:
426      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
427      #  is_blank_line = False
428
429      is_blank_line = False
430
431      for func_line in func_body:
432        if func_line.strip() == '':
433          is_blank_line = True
434          continue
435        # Do not waste time checking IR comments.
436        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
437
438        # Skip blank lines instead of checking them.
439        if is_blank_line:
440          output_lines.append('{} {}:       {}'.format(
441              comment_marker, checkprefix, func_line))
442        else:
443          output_lines.append('{} {}-NEXT:  {}'.format(
444              comment_marker, checkprefix, func_line))
445        is_blank_line = False
446
447      # Add space between different check prefixes and also before the first
448      # line of code in the test function.
449      output_lines.append(comment_marker)
450      break
451
452def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
453                  func_name, preserve_names, function_sig):
454  # Label format is based on IR string.
455  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
456  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
457  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
458             check_label_format, False, preserve_names)
459
460def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
461  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
462  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
463
464
465def check_prefix(prefix):
466  if not PREFIX_RE.match(prefix):
467        hint = ""
468        if ',' in prefix:
469          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
470        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
471             (prefix))
472
473
474def verify_filecheck_prefixes(fc_cmd):
475  fc_cmd_parts = fc_cmd.split()
476  for part in fc_cmd_parts:
477    if "check-prefix=" in part:
478      prefix = part.split('=', 1)[1]
479      check_prefix(prefix)
480    elif "check-prefixes=" in part:
481      prefixes = part.split('=', 1)[1].split(',')
482      for prefix in prefixes:
483        check_prefix(prefix)
484        if prefixes.count(prefix) > 1:
485          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
486
487
488def get_autogennote_suffix(parser, args):
489  autogenerated_note_args = ''
490  for action in parser._actions:
491    if not hasattr(args, action.dest):
492      continue  # Ignore options such as --help that aren't included in args
493    # Ignore parameters such as paths to the binary or the list of tests
494    if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary',
495                       'clang', 'opt', 'llvm_bin', 'verbose'):
496      continue
497    value = getattr(args, action.dest)
498    if action.const is not None:  # action stores a constant (usually True/False)
499      # Skip actions with different constant values (this happens with boolean
500      # --foo/--no-foo options)
501      if value != action.const:
502        continue
503    if parser.get_default(action.dest) == value:
504      continue  # Don't add default values
505    autogenerated_note_args += action.option_strings[0] + ' '
506    if action.const is None:  # action takes a parameter
507      autogenerated_note_args += '%s ' % value
508  if autogenerated_note_args:
509    autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
510  return autogenerated_note_args
511
512
513def check_for_command(line, parser, args, argv):
514    cmd_m = UTC_ARGS_CMD.match(line)
515    if cmd_m:
516        cmd = cmd_m.group('cmd').strip().split(' ')
517        argv = argv + cmd
518        args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
519    return args, argv
520