xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision e5b8772756737e41cb1e8ee1a5a33cb3d8a25be6)
1from __future__ import print_function
2import re
3import string
4import subprocess
5import sys
6import copy
7
8if sys.version_info[0] > 2:
9  class string:
10    expandtabs = str.expandtabs
11else:
12  import string
13
14##### Common utilities for update_*test_checks.py
15
16
17_verbose = False
18
19def parse_commandline_args(parser):
20  parser.add_argument('-v', '--verbose', action='store_true',
21                      help='Show verbose output')
22  parser.add_argument('-u', '--update-only', action='store_true',
23                      help='Only update test if it was already autogened')
24  args = parser.parse_args()
25  global _verbose
26  _verbose = args.verbose
27  return args
28
29def should_add_line_to_output(input_line, prefix_set):
30  # Skip any blank comment lines in the IR.
31  if input_line.strip() == ';':
32    return False
33  # Skip any blank lines in the IR.
34  #if input_line.strip() == '':
35  #  return False
36  # And skip any CHECK lines. We're building our own.
37  m = CHECK_RE.match(input_line)
38  if m and m.group(1) in prefix_set:
39    return False
40
41  return True
42
43# Invoke the tool that is being tested.
44def invoke_tool(exe, cmd_args, ir):
45  with open(ir) as ir_file:
46    # TODO Remove the str form which is used by update_test_checks.py and
47    # update_llc_test_checks.py
48    # The safer list form is used by update_cc_test_checks.py
49    if isinstance(cmd_args, list):
50      stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
51    else:
52      stdout = subprocess.check_output(exe + ' ' + cmd_args,
53                                       shell=True, stdin=ir_file)
54    if sys.version_info[0] > 2:
55      stdout = stdout.decode()
56  # Fix line endings to unix CR style.
57  return stdout.replace('\r\n', '\n')
58
59##### LLVM IR parser
60
61RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
62CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
63PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
64CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
65
66UTC_ARGS_KEY = 'UTC_ARGS:'
67UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
68
69OPT_FUNCTION_RE = re.compile(
70    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.-]+?)\s*'
71    r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$',
72    flags=(re.M | re.S))
73
74ANALYZE_FUNCTION_RE = re.compile(
75    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.-]+?)\':'
76    r'\s*\n(?P<body>.*)$',
77    flags=(re.X | re.S))
78
79IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@([\w.-]+)\s*\(')
80TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
81TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
82MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
83
84SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
85SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
86SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
87SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
88SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
89SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
90SCRUB_LOOP_COMMENT_RE = re.compile(
91    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
92SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
93
94
95def error(msg, test_file=None):
96  if test_file:
97    msg = '{}: {}'.format(msg, test_file)
98  print('ERROR: {}'.format(msg), file=sys.stderr)
99
100def warn(msg, test_file=None):
101  if test_file:
102    msg = '{}: {}'.format(msg, test_file)
103  print('WARNING: {}'.format(msg), file=sys.stderr)
104
105def debug(*args, **kwargs):
106  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
107  if 'file' not in kwargs:
108    kwargs['file'] = sys.stderr
109  if _verbose:
110    print(*args, **kwargs)
111
112def find_run_lines(test, lines):
113  debug('Scanning for RUN lines in test file:', test)
114  raw_lines = [m.group(1)
115               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
116  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
117  for l in raw_lines[1:]:
118    if run_lines[-1].endswith('\\'):
119      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
120    else:
121      run_lines.append(l)
122  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
123  for l in run_lines:
124    debug('  RUN: {}'.format(l))
125  return run_lines
126
127def scrub_body(body):
128  # Scrub runs of whitespace out of the assembly, but leave the leading
129  # whitespace in place.
130  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
131  # Expand the tabs used for indentation.
132  body = string.expandtabs(body, 2)
133  # Strip trailing whitespace.
134  body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
135  return body
136
137def do_scrub(body, scrubber, scrubber_args, extra):
138  if scrubber_args:
139    local_args = copy.deepcopy(scrubber_args)
140    local_args[0].extra_scrub = extra
141    return scrubber(body, *local_args)
142  return scrubber(body, *scrubber_args)
143
144# Build up a dictionary of all the function bodies.
145class function_body(object):
146  def __init__(self, string, extra, args_and_sig):
147    self.scrub = string
148    self.extrascrub = extra
149    self.args_and_sig = args_and_sig
150  def is_same_except_arg_names(self, extrascrub, args_and_sig):
151    arg_names = set()
152    def drop_arg_names(match):
153        arg_names.add(match.group(2))
154        return match.group(1) + match.group(3)
155    def repl_arg_names(match):
156        if match.group(2) in arg_names:
157            return match.group(1) + match.group(3)
158        return match.group(1) + match.group(2) + match.group(3)
159    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
160    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
161    if ans0 != ans1:
162        return False
163    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
164    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
165    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
166    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
167    return es0 == es1
168
169  def __str__(self):
170    return self.scrub
171
172def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args):
173  for m in function_re.finditer(raw_tool_output):
174    if not m:
175      continue
176    func = m.group('func')
177    body = m.group('body')
178    # Determine if we print arguments, the opening brace, or nothing after the function name
179    if record_args and 'args_and_sig' in m.groupdict():
180        args_and_sig = scrub_body(m.group('args_and_sig').strip())
181    elif 'args_and_sig' in m.groupdict():
182        args_and_sig = '('
183    else:
184        args_and_sig = ''
185    scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
186    scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
187    if 'analysis' in m.groupdict():
188      analysis = m.group('analysis')
189      if analysis.lower() != 'cost model analysis':
190        warn('Unsupported analysis mode: %r!' % (analysis,))
191    if func.startswith('stress'):
192      # We only use the last line of the function body for stress tests.
193      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
194    if verbose:
195      print('Processing function: ' + func, file=sys.stderr)
196      for l in scrubbed_body.splitlines():
197        print('  ' + l, file=sys.stderr)
198    for prefix in prefixes:
199      if func in func_dict[prefix] and (str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and func_dict[prefix][func].args_and_sig != args_and_sig)):
200        if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig):
201          func_dict[prefix][func].scrub = scrubbed_extra
202          func_dict[prefix][func].args_and_sig = args_and_sig
203          continue
204        else:
205          if prefix == prefixes[-1]:
206            warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
207          else:
208            func_dict[prefix][func] = None
209            continue
210
211      func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig)
212
213##### Generator of LLVM IR CHECK lines
214
215SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
216
217# Match things that look at identifiers, but only if they are followed by
218# spaces, commas, paren, or end of the string
219IR_VALUE_RE = re.compile(r'(\s+)%([\w.-]+?)([,\s\(\)]|\Z)')
220
221NAMELESS_PREFIX = "TMP"
222
223# Create a FileCheck variable name based on an IR name.
224def get_value_name(var):
225  if var.isdigit():
226    var = NAMELESS_PREFIX + var
227  var = var.replace('.', '_')
228  var = var.replace('-', '_')
229  return var.upper()
230
231
232# Create a FileCheck variable from regex.
233def get_value_definition(var):
234  return '[[' + get_value_name(var) + ':%.*]]'
235
236
237# Use a FileCheck variable.
238def get_value_use(var):
239  return '[[' + get_value_name(var) + ']]'
240
241# Replace IR value defs and uses with FileCheck variables.
242def genericize_check_lines(lines, is_analyze, vars_seen):
243  # This gets called for each match that occurs in
244  # a line. We transform variables we haven't seen
245  # into defs, and variables we have seen into uses.
246  def transform_line_vars(match):
247    var = match.group(2)
248    if NAMELESS_PREFIX.lower() in var.lower():
249      warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,))
250    if var in vars_seen:
251      rv = get_value_use(var)
252    else:
253      vars_seen.add(var)
254      rv = get_value_definition(var)
255    # re.sub replaces the entire regex match
256    # with whatever you return, so we have
257    # to make sure to hand it back everything
258    # including the commas and spaces.
259    return match.group(1) + rv + match.group(3)
260
261  lines_with_def = []
262
263  for i, line in enumerate(lines):
264    # An IR variable named '%.' matches the FileCheck regex string.
265    line = line.replace('%.', '%dot')
266    # Ignore any comments, since the check lines will too.
267    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
268    if is_analyze:
269      lines[i] = scrubbed_line
270    else:
271      lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
272  return lines
273
274
275def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
276  # prefix_blacklist are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
277  prefix_blacklist = set()
278  printed_prefixes = []
279  for p in prefix_list:
280    checkprefixes = p[0]
281    # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
282    # exist for this run line. A subset of the check prefixes might know about the function but only because
283    # other run lines created it.
284    if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
285        prefix_blacklist |= set(checkprefixes)
286        continue
287
288  # prefix_blacklist is constructed, we can now emit the output
289  for p in prefix_list:
290    checkprefixes = p[0]
291    for checkprefix in checkprefixes:
292      if checkprefix in printed_prefixes:
293        break
294
295      # Check if the prefix is blacklisted.
296      if checkprefix in prefix_blacklist:
297        continue
298
299      # If we do not have output for this prefix we skip it.
300      if not func_dict[checkprefix][func_name]:
301        continue
302
303      # Add some space between different check prefixes, but not after the last
304      # check line (before the test code).
305      if is_asm:
306        if len(printed_prefixes) != 0:
307          output_lines.append(comment_marker)
308
309      vars_seen = set()
310      printed_prefixes.append(checkprefix)
311      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
312      args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0]
313      if '[[' in args_and_sig:
314        output_lines.append(check_label_format % (checkprefix, func_name, ''))
315        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
316      else:
317        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
318      func_body = str(func_dict[checkprefix][func_name]).splitlines()
319
320      # For ASM output, just emit the check lines.
321      if is_asm:
322        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
323        for func_line in func_body[1:]:
324          if func_line.strip() == '':
325            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
326          else:
327            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
328        break
329
330      # For IR output, change all defs to FileCheck variables, so we're immune
331      # to variable naming fashions.
332      func_body = genericize_check_lines(func_body, is_analyze, vars_seen)
333
334      # This could be selectively enabled with an optional invocation argument.
335      # Disabled for now: better to check everything. Be safe rather than sorry.
336
337      # Handle the first line of the function body as a special case because
338      # it's often just noise (a useless asm comment or entry label).
339      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
340      #  is_blank_line = True
341      #else:
342      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
343      #  is_blank_line = False
344
345      is_blank_line = False
346
347      for func_line in func_body:
348        if func_line.strip() == '':
349          is_blank_line = True
350          continue
351        # Do not waste time checking IR comments.
352        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
353
354        # Skip blank lines instead of checking them.
355        if is_blank_line:
356          output_lines.append('{} {}:       {}'.format(
357              comment_marker, checkprefix, func_line))
358        else:
359          output_lines.append('{} {}-NEXT:  {}'.format(
360              comment_marker, checkprefix, func_line))
361        is_blank_line = False
362
363      # Add space between different check prefixes and also before the first
364      # line of code in the test function.
365      output_lines.append(comment_marker)
366      break
367
368def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
369                  func_name, preserve_names, function_sig):
370  # Label format is based on IR string.
371  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
372  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
373  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
374             check_label_format, False, preserve_names)
375
376def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
377  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
378  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
379
380
381def check_prefix(prefix):
382  if not PREFIX_RE.match(prefix):
383        hint = ""
384        if ',' in prefix:
385          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
386        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
387             (prefix))
388
389
390def verify_filecheck_prefixes(fc_cmd):
391  fc_cmd_parts = fc_cmd.split()
392  for part in fc_cmd_parts:
393    if "check-prefix=" in part:
394      prefix = part.split('=', 1)[1]
395      check_prefix(prefix)
396    elif "check-prefixes=" in part:
397      prefixes = part.split('=', 1)[1].split(',')
398      for prefix in prefixes:
399        check_prefix(prefix)
400        if prefixes.count(prefix) > 1:
401          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
402
403
404def get_autogennote_suffix(parser, args):
405  autogenerated_note_args = ''
406  for action in parser._actions:
407    if not hasattr(args, action.dest):
408      continue  # Ignore options such as --help that aren't included in args
409    # Ignore parameters such as paths to the binary or the list of tests
410    if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary',
411                       'clang', 'opt', 'llvm_bin', 'verbose'):
412      continue
413    value = getattr(args, action.dest)
414    if action.const is not None:  # action stores a constant (usually True/False)
415      # Skip actions with different constant values (this happens with boolean
416      # --foo/--no-foo options)
417      if value != action.const:
418        continue
419    if parser.get_default(action.dest) == value:
420      continue  # Don't add default values
421    autogenerated_note_args += action.option_strings[0] + ' '
422    if action.const is None:  # action takes a parameter
423      autogenerated_note_args += '%s ' % value
424  if autogenerated_note_args:
425    autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
426  return autogenerated_note_args
427
428
429def check_for_command(line, parser, args, argv):
430    cmd_m = UTC_ARGS_CMD.match(line)
431    if cmd_m:
432        cmd = cmd_m.group('cmd').strip().split(' ')
433        argv = argv + cmd
434        args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
435    return args, argv
436