xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision a6c59e0792edc46df937b338fe0e68d00cabf90b)
1from __future__ import print_function
2import re
3import string
4import subprocess
5import sys
6import copy
7
8if sys.version_info[0] > 2:
9  class string:
10    expandtabs = str.expandtabs
11else:
12  import string
13
14##### Common utilities for update_*test_checks.py
15
16
17_verbose = False
18
19def parse_commandline_args(parser):
20  parser.add_argument('-v', '--verbose', action='store_true',
21                      help='Show verbose output')
22  parser.add_argument('-u', '--update-only', action='store_true',
23                      help='Only update test if it was already autogened')
24  args = parser.parse_args()
25  global _verbose
26  _verbose = args.verbose
27  return args
28
29def should_add_line_to_output(input_line, prefix_set):
30  # Skip any blank comment lines in the IR.
31  if input_line.strip() == ';':
32    return False
33  # Skip any blank lines in the IR.
34  #if input_line.strip() == '':
35  #  return False
36  # And skip any CHECK lines. We're building our own.
37  m = CHECK_RE.match(input_line)
38  if m and m.group(1) in prefix_set:
39    return False
40
41  return True
42
43# Invoke the tool that is being tested.
44def invoke_tool(exe, cmd_args, ir):
45  with open(ir) as ir_file:
46    # TODO Remove the str form which is used by update_test_checks.py and
47    # update_llc_test_checks.py
48    # The safer list form is used by update_cc_test_checks.py
49    if isinstance(cmd_args, list):
50      stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
51    else:
52      stdout = subprocess.check_output(exe + ' ' + cmd_args,
53                                       shell=True, stdin=ir_file)
54    if sys.version_info[0] > 2:
55      stdout = stdout.decode()
56  # Fix line endings to unix CR style.
57  return stdout.replace('\r\n', '\n')
58
59##### LLVM IR parser
60
61RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
62CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
63PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
64CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME)?:')
65
66OPT_FUNCTION_RE = re.compile(
67    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*'
68    r'(?P<args_and_sig>\((\)|(.*?[\w\.\-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$',
69    flags=(re.M | re.S))
70
71ANALYZE_FUNCTION_RE = re.compile(
72    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w-]+?)\':'
73    r'\s*\n(?P<body>.*)$',
74    flags=(re.X | re.S))
75
76IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
77TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
78TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
79MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
80
81SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
82SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
83SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
84SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
85SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
86SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
87SCRUB_LOOP_COMMENT_RE = re.compile(
88    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
89
90
91def error(msg, test_file=None):
92  if test_file:
93    msg = '{}: {}'.format(msg, test_file)
94  print('ERROR: {}'.format(msg), file=sys.stderr)
95
96def warn(msg, test_file=None):
97  if test_file:
98    msg = '{}: {}'.format(msg, test_file)
99  print('WARNING: {}'.format(msg), file=sys.stderr)
100
101def debug(*args, **kwargs):
102  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
103  if 'file' not in kwargs:
104    kwargs['file'] = sys.stderr
105  if _verbose:
106    print(*args, **kwargs)
107
108def find_run_lines(test, lines):
109  debug('Scanning for RUN lines in test file:', test)
110  raw_lines = [m.group(1)
111               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
112  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
113  for l in raw_lines[1:]:
114    if run_lines[-1].endswith('\\'):
115      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
116    else:
117      run_lines.append(l)
118  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
119  for l in run_lines:
120    debug('  RUN: {}'.format(l))
121  return run_lines
122
123def scrub_body(body):
124  # Scrub runs of whitespace out of the assembly, but leave the leading
125  # whitespace in place.
126  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
127  # Expand the tabs used for indentation.
128  body = string.expandtabs(body, 2)
129  # Strip trailing whitespace.
130  body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
131  return body
132
133def do_scrub(body, scrubber, scrubber_args, extra):
134  if scrubber_args:
135    local_args = copy.deepcopy(scrubber_args)
136    local_args[0].extra_scrub = extra
137    return scrubber(body, *local_args)
138  return scrubber(body, *scrubber_args)
139
140# Build up a dictionary of all the function bodies.
141class function_body(object):
142  def __init__(self, string, extra, args_and_sig):
143    self.scrub = string
144    self.extrascrub = extra
145    self.args_and_sig = args_and_sig
146  def is_same_except_arg_names(self, extrascrub, args_and_sig):
147    arg_names = set()
148    def drop_arg_names(match):
149        arg_names.add(match.group(2))
150        return match.group(1) + match.group(3)
151    def repl_arg_names(match):
152        if match.group(2) in arg_names:
153            return match.group(1) + match.group(3)
154        return match.group(1) + match.group(2) + match.group(3)
155    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
156    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
157    if ans0 != ans1:
158        return False
159    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
160    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
161    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
162    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
163    return es0 == es1
164
165  def __str__(self):
166    return self.scrub
167
168def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args):
169  for m in function_re.finditer(raw_tool_output):
170    if not m:
171      continue
172    func = m.group('func')
173    body = m.group('body')
174    # Determine if we print arguments, the opening brace, or nothing after the function name
175    if record_args and 'args_and_sig' in m.groupdict():
176        args_and_sig = scrub_body(m.group('args_and_sig').strip())
177    elif 'args_and_sig' in m.groupdict():
178        args_and_sig = '('
179    else:
180        args_and_sig = ''
181    scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
182    scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
183    if 'analysis' in m.groupdict():
184      analysis = m.group('analysis')
185      if analysis.lower() != 'cost model analysis':
186        warn('Unsupported analysis mode: %r!' % (analysis,))
187    if func.startswith('stress'):
188      # We only use the last line of the function body for stress tests.
189      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
190    if verbose:
191      print('Processing function: ' + func, file=sys.stderr)
192      for l in scrubbed_body.splitlines():
193        print('  ' + l, file=sys.stderr)
194    for prefix in prefixes:
195      if func in func_dict[prefix] and (str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and func_dict[prefix][func].args_and_sig != args_and_sig)):
196        if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig):
197          func_dict[prefix][func].scrub = scrubbed_extra
198          func_dict[prefix][func].args_and_sig = args_and_sig
199          continue
200        else:
201          if prefix == prefixes[-1]:
202            warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
203          else:
204            func_dict[prefix][func] = None
205            continue
206
207      func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig)
208
209##### Generator of LLVM IR CHECK lines
210
211SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
212
213# Match things that look at identifiers, but only if they are followed by
214# spaces, commas, paren, or end of the string
215IR_VALUE_RE = re.compile(r'(\s+)%([\w\.\-]+?)([,\s\(\)]|\Z)')
216
217# Create a FileCheck variable name based on an IR name.
218def get_value_name(var):
219  if var.isdigit():
220    var = 'TMP' + var
221  var = var.replace('.', '_')
222  var = var.replace('-', '_')
223  return var.upper()
224
225
226# Create a FileCheck variable from regex.
227def get_value_definition(var):
228  return '[[' + get_value_name(var) + ':%.*]]'
229
230
231# Use a FileCheck variable.
232def get_value_use(var):
233  return '[[' + get_value_name(var) + ']]'
234
235# Replace IR value defs and uses with FileCheck variables.
236def genericize_check_lines(lines, is_analyze, vars_seen):
237  # This gets called for each match that occurs in
238  # a line. We transform variables we haven't seen
239  # into defs, and variables we have seen into uses.
240  def transform_line_vars(match):
241    var = match.group(2)
242    if var in vars_seen:
243      rv = get_value_use(var)
244    else:
245      vars_seen.add(var)
246      rv = get_value_definition(var)
247    # re.sub replaces the entire regex match
248    # with whatever you return, so we have
249    # to make sure to hand it back everything
250    # including the commas and spaces.
251    return match.group(1) + rv + match.group(3)
252
253  lines_with_def = []
254
255  for i, line in enumerate(lines):
256    # An IR variable named '%.' matches the FileCheck regex string.
257    line = line.replace('%.', '%dot')
258    # Ignore any comments, since the check lines will too.
259    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
260    if is_analyze:
261      lines[i] = scrubbed_line
262    else:
263      lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
264  return lines
265
266
267def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
268  # prefix_blacklist are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
269  prefix_blacklist = set()
270  printed_prefixes = []
271  for p in prefix_list:
272    checkprefixes = p[0]
273    # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
274    # exist for this run line. A subset of the check prefixes might know about the function but only because
275    # other run lines created it.
276    if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
277        prefix_blacklist |= set(checkprefixes)
278        continue
279
280  # prefix_blacklist is constructed, we can now emit the output
281  for p in prefix_list:
282    checkprefixes = p[0]
283    saved_output = None
284    for checkprefix in checkprefixes:
285      if checkprefix in printed_prefixes:
286        break
287
288      # prefix is blacklisted. We remember the output as we might need it later but we will not emit anything for the prefix.
289      if checkprefix in prefix_blacklist:
290          if not saved_output and func_name in func_dict[checkprefix]:
291              saved_output = func_dict[checkprefix][func_name]
292          continue
293
294      # If we do not have output for this prefix but there is one saved, we go ahead with this prefix and the saved output.
295      if not func_dict[checkprefix][func_name]:
296        if not saved_output:
297            continue
298        func_dict[checkprefix][func_name] = saved_output
299
300      # Add some space between different check prefixes, but not after the last
301      # check line (before the test code).
302      if is_asm:
303        if len(printed_prefixes) != 0:
304          output_lines.append(comment_marker)
305
306      vars_seen = set()
307      printed_prefixes.append(checkprefix)
308      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
309      args_and_sig = genericize_check_lines([args_and_sig], is_analyze, vars_seen)[0]
310      if '[[' in args_and_sig:
311        output_lines.append(check_label_format % (checkprefix, func_name, ''))
312        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
313      else:
314        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
315      func_body = str(func_dict[checkprefix][func_name]).splitlines()
316
317      # For ASM output, just emit the check lines.
318      if is_asm:
319        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
320        for func_line in func_body[1:]:
321          if func_line.strip() == '':
322            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
323          else:
324            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
325        break
326
327      # For IR output, change all defs to FileCheck variables, so we're immune
328      # to variable naming fashions.
329      func_body = genericize_check_lines(func_body, is_analyze, vars_seen)
330
331      # This could be selectively enabled with an optional invocation argument.
332      # Disabled for now: better to check everything. Be safe rather than sorry.
333
334      # Handle the first line of the function body as a special case because
335      # it's often just noise (a useless asm comment or entry label).
336      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
337      #  is_blank_line = True
338      #else:
339      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
340      #  is_blank_line = False
341
342      is_blank_line = False
343
344      for func_line in func_body:
345        if func_line.strip() == '':
346          is_blank_line = True
347          continue
348        # Do not waste time checking IR comments.
349        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
350
351        # Skip blank lines instead of checking them.
352        if is_blank_line:
353          output_lines.append('{} {}:       {}'.format(
354              comment_marker, checkprefix, func_line))
355        else:
356          output_lines.append('{} {}-NEXT:  {}'.format(
357              comment_marker, checkprefix, func_line))
358        is_blank_line = False
359
360      # Add space between different check prefixes and also before the first
361      # line of code in the test function.
362      output_lines.append(comment_marker)
363      break
364
365def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
366                  func_name, preserve_names, function_sig):
367  # Label format is based on IR string.
368  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
369  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
370  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
371             check_label_format, False, preserve_names)
372
373def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
374  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
375  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
376
377
378def check_prefix(prefix):
379  if not PREFIX_RE.match(prefix):
380        hint = ""
381        if ',' in prefix:
382          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
383        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
384             (prefix))
385
386
387def verify_filecheck_prefixes(fc_cmd):
388  fc_cmd_parts = fc_cmd.split()
389  for part in fc_cmd_parts:
390    if "check-prefix=" in part:
391      prefix = part.split('=', 1)[1]
392      check_prefix(prefix)
393    elif "check-prefixes=" in part:
394      prefixes = part.split('=', 1)[1].split(',')
395      for prefix in prefixes:
396        check_prefix(prefix)
397        if prefixes.count(prefix) > 1:
398          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
399