xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision d9542db49e90457de62af3bfe395aaf4c47b68a5)
1from __future__ import print_function
2import re
3import string
4import subprocess
5import sys
6import copy
7
8if sys.version_info[0] > 2:
9  class string:
10    expandtabs = str.expandtabs
11else:
12  import string
13
14##### Common utilities for update_*test_checks.py
15
16
17_verbose = False
18
19def parse_commandline_args(parser):
20  parser.add_argument('-v', '--verbose', action='store_true',
21                      help='Show verbose output')
22  parser.add_argument('-u', '--update-only', action='store_true',
23                      help='Only update test if it was already autogened')
24  args = parser.parse_args()
25  global _verbose
26  _verbose = args.verbose
27  return args
28
29def should_add_line_to_output(input_line, prefix_set):
30  # Skip any blank comment lines in the IR.
31  if input_line.strip() == ';':
32    return False
33  # Skip any blank lines in the IR.
34  #if input_line.strip() == '':
35  #  return False
36  # And skip any CHECK lines. We're building our own.
37  m = CHECK_RE.match(input_line)
38  if m and m.group(1) in prefix_set:
39    return False
40
41  return True
42
43# Invoke the tool that is being tested.
44def invoke_tool(exe, cmd_args, ir):
45  with open(ir) as ir_file:
46    # TODO Remove the str form which is used by update_test_checks.py and
47    # update_llc_test_checks.py
48    # The safer list form is used by update_cc_test_checks.py
49    if isinstance(cmd_args, list):
50      stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
51    else:
52      stdout = subprocess.check_output(exe + ' ' + cmd_args,
53                                       shell=True, stdin=ir_file)
54    if sys.version_info[0] > 2:
55      stdout = stdout.decode()
56  # Fix line endings to unix CR style.
57  return stdout.replace('\r\n', '\n')
58
59##### LLVM IR parser
60
61RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
62CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
63PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
64CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME)?:')
65
66OPT_FUNCTION_RE = re.compile(
67    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*'
68    r'(?P<args_and_sig>\((\)|(.*?[\w\.\-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$',
69    flags=(re.M | re.S))
70
71ANALYZE_FUNCTION_RE = re.compile(
72    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w-]+?)\':'
73    r'\s*\n(?P<body>.*)$',
74    flags=(re.X | re.S))
75
76IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
77TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
78TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
79MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
80
81SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
82SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
83SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
84SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
85SCRUB_LOOP_COMMENT_RE = re.compile(
86    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
87
88
89def error(msg, test_file=None):
90  if test_file:
91    msg = '{}: {}'.format(msg, test_file)
92  print('ERROR: {}'.format(msg), file=sys.stderr)
93
94def warn(msg, test_file=None):
95  if test_file:
96    msg = '{}: {}'.format(msg, test_file)
97  print('WARNING: {}'.format(msg), file=sys.stderr)
98
99def debug(*args, **kwargs):
100  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
101  if 'file' not in kwargs:
102    kwargs['file'] = sys.stderr
103  if _verbose:
104    print(*args, **kwargs)
105
106def find_run_lines(test, lines):
107  debug('Scanning for RUN lines in test file:', test)
108  raw_lines = [m.group(1)
109               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
110  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
111  for l in raw_lines[1:]:
112    if run_lines[-1].endswith('\\'):
113      run_lines[-1] = run_lines[-1].rstrip('\\' + ' ' + l)
114    else:
115      run_lines.append(l)
116  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
117  for l in run_lines:
118    debug('  RUN: {}'.format(l))
119  return run_lines
120
121def scrub_body(body):
122  # Scrub runs of whitespace out of the assembly, but leave the leading
123  # whitespace in place.
124  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
125  # Expand the tabs used for indentation.
126  body = string.expandtabs(body, 2)
127  # Strip trailing whitespace.
128  body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
129  return body
130
131def do_scrub(body, scrubber, scrubber_args, extra):
132  if scrubber_args:
133    local_args = copy.deepcopy(scrubber_args)
134    local_args[0].extra_scrub = extra
135    return scrubber(body, *local_args)
136  return scrubber(body, *scrubber_args)
137
138# Build up a dictionary of all the function bodies.
139class function_body(object):
140  def __init__(self, string, extra, args_and_sig):
141    self.scrub = string
142    self.extrascrub = extra
143    self.args_and_sig = args_and_sig
144  def is_same_except_arg_names(self, extrascrub, args_and_sig):
145    arg_names = set()
146    def drop_arg_names(match):
147        arg_names.add(match.group(2))
148        return match.group(1) + match.group(3)
149    def repl_arg_names(match):
150        if match.group(2) in arg_names:
151            return match.group(1) + match.group(3)
152        return match.group(1) + match.group(2) + match.group(3)
153    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
154    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
155    if ans0 != ans1:
156        return False
157    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
158    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
159    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
160    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
161    return es0 == es1
162
163  def __str__(self):
164    return self.scrub
165
166def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args):
167  for m in function_re.finditer(raw_tool_output):
168    if not m:
169      continue
170    func = m.group('func')
171    body = m.group('body')
172    # Determine if we print arguments, the opening brace, or nothing after the function name
173    if record_args and 'args_and_sig' in m.groupdict():
174        args_and_sig = scrub_body(m.group('args_and_sig').strip())
175    elif 'args_and_sig' in m.groupdict():
176        args_and_sig = '('
177    else:
178        args_and_sig = ''
179    scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
180    scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
181    if 'analysis' in m.groupdict():
182      analysis = m.group('analysis')
183      if analysis.lower() != 'cost model analysis':
184        warn('Unsupported analysis mode: %r!' % (analysis,))
185    if func.startswith('stress'):
186      # We only use the last line of the function body for stress tests.
187      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
188    if verbose:
189      print('Processing function: ' + func, file=sys.stderr)
190      for l in scrubbed_body.splitlines():
191        print('  ' + l, file=sys.stderr)
192    for prefix in prefixes:
193      if func in func_dict[prefix] and (str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and func_dict[prefix][func].args_and_sig != args_and_sig)):
194        if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig):
195          func_dict[prefix][func].scrub = scrubbed_extra
196          func_dict[prefix][func].args_and_sig = args_and_sig
197          continue
198        else:
199          if prefix == prefixes[-1]:
200            warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
201          else:
202            func_dict[prefix][func] = None
203            continue
204
205      func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig)
206
207##### Generator of LLVM IR CHECK lines
208
209SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
210
211# Match things that look at identifiers, but only if they are followed by
212# spaces, commas, paren, or end of the string
213IR_VALUE_RE = re.compile(r'(\s+)%([\w\.\-]+?)([,\s\(\)]|\Z)')
214
215# Create a FileCheck variable name based on an IR name.
216def get_value_name(var):
217  if var.isdigit():
218    var = 'TMP' + var
219  var = var.replace('.', '_')
220  var = var.replace('-', '_')
221  return var.upper()
222
223
224# Create a FileCheck variable from regex.
225def get_value_definition(var):
226  return '[[' + get_value_name(var) + ':%.*]]'
227
228
229# Use a FileCheck variable.
230def get_value_use(var):
231  return '[[' + get_value_name(var) + ']]'
232
233# Replace IR value defs and uses with FileCheck variables.
234def genericize_check_lines(lines, is_analyze):
235  # This gets called for each match that occurs in
236  # a line. We transform variables we haven't seen
237  # into defs, and variables we have seen into uses.
238  def transform_line_vars(match):
239    var = match.group(2)
240    if var in vars_seen:
241      rv = get_value_use(var)
242    else:
243      vars_seen.add(var)
244      rv = get_value_definition(var)
245    # re.sub replaces the entire regex match
246    # with whatever you return, so we have
247    # to make sure to hand it back everything
248    # including the commas and spaces.
249    return match.group(1) + rv + match.group(3)
250
251  vars_seen = set()
252  lines_with_def = []
253
254  for i, line in enumerate(lines):
255    # An IR variable named '%.' matches the FileCheck regex string.
256    line = line.replace('%.', '%dot')
257    # Ignore any comments, since the check lines will too.
258    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
259    if is_analyze:
260      lines[i] = scrubbed_line
261    else:
262      lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
263  return lines
264
265
266def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
267  printed_prefixes = []
268  for p in prefix_list:
269    checkprefixes = p[0]
270    for checkprefix in checkprefixes:
271      if checkprefix in printed_prefixes:
272        break
273      # TODO func_dict[checkprefix] may be None, '' or not exist.
274      # Fix the call sites.
275      if func_name not in func_dict[checkprefix] or not func_dict[checkprefix][func_name]:
276        continue
277
278      # Add some space between different check prefixes, but not after the last
279      # check line (before the test code).
280      if is_asm:
281        if len(printed_prefixes) != 0:
282          output_lines.append(comment_marker)
283
284      printed_prefixes.append(checkprefix)
285      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
286      args_and_sig = genericize_check_lines([args_and_sig], is_analyze)[0]
287      if '[[' in args_and_sig:
288        output_lines.append(check_label_format % (checkprefix, func_name, ''))
289        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
290      else:
291        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
292      func_body = str(func_dict[checkprefix][func_name]).splitlines()
293
294      # For ASM output, just emit the check lines.
295      if is_asm:
296        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
297        for func_line in func_body[1:]:
298          if func_line.strip() == '':
299            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
300          else:
301            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
302        break
303
304      # For IR output, change all defs to FileCheck variables, so we're immune
305      # to variable naming fashions.
306      func_body = genericize_check_lines(func_body, is_analyze)
307
308      # This could be selectively enabled with an optional invocation argument.
309      # Disabled for now: better to check everything. Be safe rather than sorry.
310
311      # Handle the first line of the function body as a special case because
312      # it's often just noise (a useless asm comment or entry label).
313      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
314      #  is_blank_line = True
315      #else:
316      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
317      #  is_blank_line = False
318
319      is_blank_line = False
320
321      for func_line in func_body:
322        if func_line.strip() == '':
323          is_blank_line = True
324          continue
325        # Do not waste time checking IR comments.
326        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
327
328        # Skip blank lines instead of checking them.
329        if is_blank_line:
330          output_lines.append('{} {}:       {}'.format(
331              comment_marker, checkprefix, func_line))
332        else:
333          output_lines.append('{} {}-NEXT:  {}'.format(
334              comment_marker, checkprefix, func_line))
335        is_blank_line = False
336
337      # Add space between different check prefixes and also before the first
338      # line of code in the test function.
339      output_lines.append(comment_marker)
340      break
341
342def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
343                  func_name, preserve_names, function_sig):
344  # Label format is based on IR string.
345  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
346  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
347  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
348             check_label_format, False, preserve_names)
349
350def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
351  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
352  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
353
354
355def check_prefix(prefix):
356  if not PREFIX_RE.match(prefix):
357        hint = ""
358        if ',' in prefix:
359          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
360        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
361             (prefix))
362
363
364def verify_filecheck_prefixes(fc_cmd):
365  fc_cmd_parts = fc_cmd.split()
366  for part in fc_cmd_parts:
367    if "check-prefix=" in part:
368      prefix = part.split('=', 1)[1]
369      check_prefix(prefix)
370    elif "check-prefixes=" in part:
371      prefixes = part.split('=', 1)[1].split(',')
372      for prefix in prefixes:
373        check_prefix(prefix)
374        if prefixes.count(prefix) > 1:
375          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
376