xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision f50bc823fe6f4279eb2f426dd54f3151878c0216)
1from __future__ import print_function
2import re
3import string
4import subprocess
5import sys
6import copy
7
8if sys.version_info[0] > 2:
9  class string:
10    expandtabs = str.expandtabs
11else:
12  import string
13
14##### Common utilities for update_*test_checks.py
15
16
17_verbose = False
18
19def parse_commandline_args(parser):
20  parser.add_argument('-v', '--verbose', action='store_true',
21                      help='Show verbose output')
22  parser.add_argument('-u', '--update-only', action='store_true',
23                      help='Only update test if it was already autogened')
24  args = parser.parse_args()
25  global _verbose
26  _verbose = args.verbose
27  return args
28
29def should_add_line_to_output(input_line, prefix_set):
30  # Skip any blank comment lines in the IR.
31  if input_line.strip() == ';':
32    return False
33  # Skip any blank lines in the IR.
34  #if input_line.strip() == '':
35  #  return False
36  # And skip any CHECK lines. We're building our own.
37  m = CHECK_RE.match(input_line)
38  if m and m.group(1) in prefix_set:
39    return False
40
41  return True
42
43# Invoke the tool that is being tested.
44def invoke_tool(exe, cmd_args, ir):
45  with open(ir) as ir_file:
46    # TODO Remove the str form which is used by update_test_checks.py and
47    # update_llc_test_checks.py
48    # The safer list form is used by update_cc_test_checks.py
49    if isinstance(cmd_args, list):
50      stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
51    else:
52      stdout = subprocess.check_output(exe + ' ' + cmd_args,
53                                       shell=True, stdin=ir_file)
54    if sys.version_info[0] > 2:
55      stdout = stdout.decode()
56  # Fix line endings to unix CR style.
57  return stdout.replace('\r\n', '\n')
58
59##### LLVM IR parser
60
61RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
62CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
63PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
64CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
65
66UTC_ARGS_KEY = 'UTC_ARGS:'
67UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
68
69OPT_FUNCTION_RE = re.compile(
70    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.-]+?)\s*'
71    r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$',
72    flags=(re.M | re.S))
73
74ANALYZE_FUNCTION_RE = re.compile(
75    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.-]+?)\':'
76    r'\s*\n(?P<body>.*)$',
77    flags=(re.X | re.S))
78
79IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@([\w.-]+)\s*\(')
80TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
81TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
82MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
83
84SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
85SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
86SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
87SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
88SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
89SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
90SCRUB_LOOP_COMMENT_RE = re.compile(
91    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
92SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
93
94
95def error(msg, test_file=None):
96  if test_file:
97    msg = '{}: {}'.format(msg, test_file)
98  print('ERROR: {}'.format(msg), file=sys.stderr)
99
100def warn(msg, test_file=None):
101  if test_file:
102    msg = '{}: {}'.format(msg, test_file)
103  print('WARNING: {}'.format(msg), file=sys.stderr)
104
105def debug(*args, **kwargs):
106  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
107  if 'file' not in kwargs:
108    kwargs['file'] = sys.stderr
109  if _verbose:
110    print(*args, **kwargs)
111
112def find_run_lines(test, lines):
113  debug('Scanning for RUN lines in test file:', test)
114  raw_lines = [m.group(1)
115               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
116  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
117  for l in raw_lines[1:]:
118    if run_lines[-1].endswith('\\'):
119      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
120    else:
121      run_lines.append(l)
122  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
123  for l in run_lines:
124    debug('  RUN: {}'.format(l))
125  return run_lines
126
127def scrub_body(body):
128  # Scrub runs of whitespace out of the assembly, but leave the leading
129  # whitespace in place.
130  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
131  # Expand the tabs used for indentation.
132  body = string.expandtabs(body, 2)
133  # Strip trailing whitespace.
134  body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
135  return body
136
137def do_scrub(body, scrubber, scrubber_args, extra):
138  if scrubber_args:
139    local_args = copy.deepcopy(scrubber_args)
140    local_args[0].extra_scrub = extra
141    return scrubber(body, *local_args)
142  return scrubber(body, *scrubber_args)
143
144# Build up a dictionary of all the function bodies.
145class function_body(object):
146  def __init__(self, string, extra, args_and_sig):
147    self.scrub = string
148    self.extrascrub = extra
149    self.args_and_sig = args_and_sig
150  def is_same_except_arg_names(self, extrascrub, args_and_sig):
151    arg_names = set()
152    def drop_arg_names(match):
153        arg_names.add(match.group(2))
154        return match.group(1) + match.group(3)
155    def repl_arg_names(match):
156        if match.group(2) in arg_names:
157            return match.group(1) + match.group(3)
158        return match.group(1) + match.group(2) + match.group(3)
159    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
160    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
161    if ans0 != ans1:
162        return False
163    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
164    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
165    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
166    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
167    return es0 == es1
168
169  def __str__(self):
170    return self.scrub
171
172def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args):
173  for m in function_re.finditer(raw_tool_output):
174    if not m:
175      continue
176    func = m.group('func')
177    body = m.group('body')
178    # Determine if we print arguments, the opening brace, or nothing after the function name
179    if record_args and 'args_and_sig' in m.groupdict():
180        args_and_sig = scrub_body(m.group('args_and_sig').strip())
181    elif 'args_and_sig' in m.groupdict():
182        args_and_sig = '('
183    else:
184        args_and_sig = ''
185    scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
186    scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
187    if 'analysis' in m.groupdict():
188      analysis = m.group('analysis')
189      if analysis.lower() != 'cost model analysis':
190        warn('Unsupported analysis mode: %r!' % (analysis,))
191    if func.startswith('stress'):
192      # We only use the last line of the function body for stress tests.
193      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
194    if verbose:
195      print('Processing function: ' + func, file=sys.stderr)
196      for l in scrubbed_body.splitlines():
197        print('  ' + l, file=sys.stderr)
198    for prefix in prefixes:
199      if func in func_dict[prefix] and (str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and func_dict[prefix][func].args_and_sig != args_and_sig)):
200        if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig):
201          func_dict[prefix][func].scrub = scrubbed_extra
202          func_dict[prefix][func].args_and_sig = args_and_sig
203          continue
204        else:
205          if prefix == prefixes[-1]:
206            warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
207          else:
208            func_dict[prefix][func] = None
209            continue
210
211      func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig)
212
213##### Generator of LLVM IR CHECK lines
214
215SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
216
217# Match things that look at identifiers, but only if they are followed by
218# spaces, commas, paren, or end of the string
219IR_VALUE_RE = re.compile(r'(\s+)%([\w.-]+?)([,\s\(\)]|\Z)')
220
221# Create a FileCheck variable name based on an IR name.
222def get_value_name(var):
223  if var.isdigit():
224    var = 'TMP' + var
225  var = var.replace('.', '_')
226  var = var.replace('-', '_')
227  return var.upper()
228
229
230# Create a FileCheck variable from regex.
231def get_value_definition(var):
232  return '[[' + get_value_name(var) + ':%.*]]'
233
234
235# Use a FileCheck variable.
236def get_value_use(var):
237  return '[[' + get_value_name(var) + ']]'
238
239# Replace IR value defs and uses with FileCheck variables.
240def genericize_check_lines(lines, is_analyze, vars_seen):
241  # This gets called for each match that occurs in
242  # a line. We transform variables we haven't seen
243  # into defs, and variables we have seen into uses.
244  def transform_line_vars(match):
245    var = match.group(2)
246    if var in vars_seen:
247      rv = get_value_use(var)
248    else:
249      vars_seen.add(var)
250      rv = get_value_definition(var)
251    # re.sub replaces the entire regex match
252    # with whatever you return, so we have
253    # to make sure to hand it back everything
254    # including the commas and spaces.
255    return match.group(1) + rv + match.group(3)
256
257  lines_with_def = []
258
259  for i, line in enumerate(lines):
260    # An IR variable named '%.' matches the FileCheck regex string.
261    line = line.replace('%.', '%dot')
262    # Ignore any comments, since the check lines will too.
263    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
264    if is_analyze:
265      lines[i] = scrubbed_line
266    else:
267      lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
268  return lines
269
270
271def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
272  # prefix_blacklist are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
273  prefix_blacklist = set()
274  printed_prefixes = []
275  for p in prefix_list:
276    checkprefixes = p[0]
277    # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
278    # exist for this run line. A subset of the check prefixes might know about the function but only because
279    # other run lines created it.
280    if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
281        prefix_blacklist |= set(checkprefixes)
282        continue
283
284  # prefix_blacklist is constructed, we can now emit the output
285  for p in prefix_list:
286    checkprefixes = p[0]
287    for checkprefix in checkprefixes:
288      if checkprefix in printed_prefixes:
289        break
290
291      # Check if the prefix is blacklisted.
292      if checkprefix in prefix_blacklist:
293        continue
294
295      # If we do not have output for this prefix we skip it.
296      if not func_dict[checkprefix][func_name]:
297        continue
298
299      # Add some space between different check prefixes, but not after the last
300      # check line (before the test code).
301      if is_asm:
302        if len(printed_prefixes) != 0:
303          output_lines.append(comment_marker)
304
305      vars_seen = set()
306      printed_prefixes.append(checkprefix)
307      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
308      args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0]
309      if '[[' in args_and_sig:
310        output_lines.append(check_label_format % (checkprefix, func_name, ''))
311        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
312      else:
313        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
314      func_body = str(func_dict[checkprefix][func_name]).splitlines()
315
316      # For ASM output, just emit the check lines.
317      if is_asm:
318        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
319        for func_line in func_body[1:]:
320          if func_line.strip() == '':
321            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
322          else:
323            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
324        break
325
326      # For IR output, change all defs to FileCheck variables, so we're immune
327      # to variable naming fashions.
328      func_body = genericize_check_lines(func_body, is_analyze, vars_seen)
329
330      # This could be selectively enabled with an optional invocation argument.
331      # Disabled for now: better to check everything. Be safe rather than sorry.
332
333      # Handle the first line of the function body as a special case because
334      # it's often just noise (a useless asm comment or entry label).
335      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
336      #  is_blank_line = True
337      #else:
338      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
339      #  is_blank_line = False
340
341      is_blank_line = False
342
343      for func_line in func_body:
344        if func_line.strip() == '':
345          is_blank_line = True
346          continue
347        # Do not waste time checking IR comments.
348        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
349
350        # Skip blank lines instead of checking them.
351        if is_blank_line:
352          output_lines.append('{} {}:       {}'.format(
353              comment_marker, checkprefix, func_line))
354        else:
355          output_lines.append('{} {}-NEXT:  {}'.format(
356              comment_marker, checkprefix, func_line))
357        is_blank_line = False
358
359      # Add space between different check prefixes and also before the first
360      # line of code in the test function.
361      output_lines.append(comment_marker)
362      break
363
364def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
365                  func_name, preserve_names, function_sig):
366  # Label format is based on IR string.
367  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
368  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
369  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
370             check_label_format, False, preserve_names)
371
372def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
373  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
374  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
375
376
377def check_prefix(prefix):
378  if not PREFIX_RE.match(prefix):
379        hint = ""
380        if ',' in prefix:
381          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
382        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
383             (prefix))
384
385
386def verify_filecheck_prefixes(fc_cmd):
387  fc_cmd_parts = fc_cmd.split()
388  for part in fc_cmd_parts:
389    if "check-prefix=" in part:
390      prefix = part.split('=', 1)[1]
391      check_prefix(prefix)
392    elif "check-prefixes=" in part:
393      prefixes = part.split('=', 1)[1].split(',')
394      for prefix in prefixes:
395        check_prefix(prefix)
396        if prefixes.count(prefix) > 1:
397          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
398
399
400def get_autogennote_suffix(parser, args):
401  autogenerated_note_args = ''
402  for action in parser._actions:
403    if not hasattr(args, action.dest):
404      continue  # Ignore options such as --help that aren't included in args
405    # Ignore parameters such as paths to the binary or the list of tests
406    if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary',
407                       'clang', 'opt', 'llvm_bin', 'verbose'):
408      continue
409    value = getattr(args, action.dest)
410    if action.const is not None:  # action stores a constant (usually True/False)
411      # Skip actions with different constant values (this happens with boolean
412      # --foo/--no-foo options)
413      if value != action.const:
414        continue
415    if parser.get_default(action.dest) == value:
416      continue  # Don't add default values
417    autogenerated_note_args += action.option_strings[0] + ' '
418    if action.const is None:  # action takes a parameter
419      autogenerated_note_args += '%s ' % value
420  if autogenerated_note_args:
421    autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
422  return autogenerated_note_args
423
424
425def check_for_command(line, parser, args, argv):
426    cmd_m = UTC_ARGS_CMD.match(line)
427    if cmd_m:
428        cmd = cmd_m.group('cmd').strip().split(' ')
429        argv = argv + cmd
430        args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
431    return args, argv
432