xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision a8a89c77ea3c16b45763fca6940bbfd3bef7884f)
1from __future__ import print_function
2import re
3import string
4import subprocess
5import sys
6import copy
7
8if sys.version_info[0] > 2:
9  class string:
10    expandtabs = str.expandtabs
11else:
12  import string
13
14##### Common utilities for update_*test_checks.py
15
16def should_add_line_to_output(input_line, prefix_set):
17  # Skip any blank comment lines in the IR.
18  if input_line.strip() == ';':
19    return False
20  # Skip any blank lines in the IR.
21  #if input_line.strip() == '':
22  #  return False
23  # And skip any CHECK lines. We're building our own.
24  m = CHECK_RE.match(input_line)
25  if m and m.group(1) in prefix_set:
26    return False
27
28  return True
29
30# Invoke the tool that is being tested.
31def invoke_tool(exe, cmd_args, ir):
32  with open(ir) as ir_file:
33    # TODO Remove the str form which is used by update_test_checks.py and
34    # update_llc_test_checks.py
35    # The safer list form is used by update_cc_test_checks.py
36    if isinstance(cmd_args, list):
37      stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
38    else:
39      stdout = subprocess.check_output(exe + ' ' + cmd_args,
40                                       shell=True, stdin=ir_file)
41    if sys.version_info[0] > 2:
42      stdout = stdout.decode()
43  # Fix line endings to unix CR style.
44  return stdout.replace('\r\n', '\n')
45
46##### LLVM IR parser
47
48RUN_LINE_RE = re.compile(r'^\s*[;#]\s*RUN:\s*(.*)$')
49CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
50PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
51CHECK_RE = re.compile(r'^\s*[;#]\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME)?:')
52
53OPT_FUNCTION_RE = re.compile(
54    r'^\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w-]+?)\s*'
55    r'(?P<args_and_sig>\((\)|(.*?[\w\.\-]+?)\))[^{]*)\{\n(?P<body>.*?)^\}$',
56    flags=(re.M | re.S))
57
58ANALYZE_FUNCTION_RE = re.compile(
59    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w-]+?)\':'
60    r'\s*\n(?P<body>.*)$',
61    flags=(re.X | re.S))
62
63IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@(\w+)\s*\(')
64TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
65TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
66MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
67
68SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
69SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
70SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
71SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
72SCRUB_LOOP_COMMENT_RE = re.compile(
73    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
74
75
76def error(msg, test_file=None):
77  if test_file:
78    msg = '{}: {}'.format(msg, test_file)
79  print('ERROR: {}'.format(msg), file=sys.stderr)
80
81def warn(msg, test_file=None):
82  if test_file:
83    msg = '{}: {}'.format(msg, test_file)
84  print('WARNING: {}'.format(msg), file=sys.stderr)
85
86def scrub_body(body):
87  # Scrub runs of whitespace out of the assembly, but leave the leading
88  # whitespace in place.
89  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
90  # Expand the tabs used for indentation.
91  body = string.expandtabs(body, 2)
92  # Strip trailing whitespace.
93  body = SCRUB_TRAILING_WHITESPACE_RE.sub(r'', body)
94  return body
95
96def do_scrub(body, scrubber, scrubber_args, extra):
97  if scrubber_args:
98    local_args = copy.deepcopy(scrubber_args)
99    local_args[0].extra_scrub = extra
100    return scrubber(body, *local_args)
101  return scrubber(body, *scrubber_args)
102
103# Build up a dictionary of all the function bodies.
104class function_body(object):
105  def __init__(self, string, extra, args_and_sig):
106    self.scrub = string
107    self.extrascrub = extra
108    self.args_and_sig = args_and_sig
109  def is_same_except_arg_names(self, extrascrub, args_and_sig):
110    arg_names = set()
111    def drop_arg_names(match):
112        arg_names.add(match.group(2))
113        return match.group(1) + match.group(3)
114    def repl_arg_names(match):
115        if match.group(2) in arg_names:
116            return match.group(1) + match.group(3)
117        return match.group(1) + match.group(2) + match.group(3)
118    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
119    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
120    if ans0 != ans1:
121        return False
122    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
123    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
124    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
125    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
126    return es0 == es1
127
128  def __str__(self):
129    return self.scrub
130
131def build_function_body_dictionary(function_re, scrubber, scrubber_args, raw_tool_output, prefixes, func_dict, verbose, record_args):
132  for m in function_re.finditer(raw_tool_output):
133    if not m:
134      continue
135    func = m.group('func')
136    body = m.group('body')
137    # Determine if we print arguments, the opening brace, or nothing after the function name
138    if record_args and 'args_and_sig' in m.groupdict():
139        args_and_sig = scrub_body(m.group('args_and_sig').strip())
140    elif 'args_and_sig' in m.groupdict():
141        args_and_sig = '('
142    else:
143        args_and_sig = ''
144    scrubbed_body = do_scrub(body, scrubber, scrubber_args, extra = False)
145    scrubbed_extra = do_scrub(body, scrubber, scrubber_args, extra = True)
146    if 'analysis' in m.groupdict():
147      analysis = m.group('analysis')
148      if analysis.lower() != 'cost model analysis':
149        warn('Unsupported analysis mode: %r!' % (analysis,))
150    if func.startswith('stress'):
151      # We only use the last line of the function body for stress tests.
152      scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
153    if verbose:
154      print('Processing function: ' + func, file=sys.stderr)
155      for l in scrubbed_body.splitlines():
156        print('  ' + l, file=sys.stderr)
157    for prefix in prefixes:
158      if func in func_dict[prefix] and (str(func_dict[prefix][func]) != scrubbed_body or (func_dict[prefix][func] and func_dict[prefix][func].args_and_sig != args_and_sig)):
159        if func_dict[prefix][func] and func_dict[prefix][func].is_same_except_arg_names(scrubbed_extra, args_and_sig):
160          func_dict[prefix][func].scrub = scrubbed_extra
161          func_dict[prefix][func].args_and_sig = args_and_sig
162          continue
163        else:
164          if prefix == prefixes[-1]:
165            warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
166          else:
167            func_dict[prefix][func] = None
168            continue
169
170      func_dict[prefix][func] = function_body(scrubbed_body, scrubbed_extra, args_and_sig)
171
172##### Generator of LLVM IR CHECK lines
173
174SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
175
176# Match things that look at identifiers, but only if they are followed by
177# spaces, commas, paren, or end of the string
178IR_VALUE_RE = re.compile(r'(\s+)%([\w\.\-]+?)([,\s\(\)]|\Z)')
179
180# Create a FileCheck variable name based on an IR name.
181def get_value_name(var):
182  if var.isdigit():
183    var = 'TMP' + var
184  var = var.replace('.', '_')
185  var = var.replace('-', '_')
186  return var.upper()
187
188
189# Create a FileCheck variable from regex.
190def get_value_definition(var):
191  return '[[' + get_value_name(var) + ':%.*]]'
192
193
194# Use a FileCheck variable.
195def get_value_use(var):
196  return '[[' + get_value_name(var) + ']]'
197
198# Replace IR value defs and uses with FileCheck variables.
199def genericize_check_lines(lines, is_analyze):
200  # This gets called for each match that occurs in
201  # a line. We transform variables we haven't seen
202  # into defs, and variables we have seen into uses.
203  def transform_line_vars(match):
204    var = match.group(2)
205    if var in vars_seen:
206      rv = get_value_use(var)
207    else:
208      vars_seen.add(var)
209      rv = get_value_definition(var)
210    # re.sub replaces the entire regex match
211    # with whatever you return, so we have
212    # to make sure to hand it back everything
213    # including the commas and spaces.
214    return match.group(1) + rv + match.group(3)
215
216  vars_seen = set()
217  lines_with_def = []
218
219  for i, line in enumerate(lines):
220    # An IR variable named '%.' matches the FileCheck regex string.
221    line = line.replace('%.', '%dot')
222    # Ignore any comments, since the check lines will too.
223    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
224    if is_analyze:
225      lines[i] = scrubbed_line
226    else:
227      lines[i] = IR_VALUE_RE.sub(transform_line_vars, scrubbed_line)
228  return lines
229
230
231def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze):
232  printed_prefixes = []
233  for p in prefix_list:
234    checkprefixes = p[0]
235    for checkprefix in checkprefixes:
236      if checkprefix in printed_prefixes:
237        break
238      # TODO func_dict[checkprefix] may be None, '' or not exist.
239      # Fix the call sites.
240      if func_name not in func_dict[checkprefix] or not func_dict[checkprefix][func_name]:
241        continue
242
243      # Add some space between different check prefixes, but not after the last
244      # check line (before the test code).
245      if is_asm:
246        if len(printed_prefixes) != 0:
247          output_lines.append(comment_marker)
248
249      printed_prefixes.append(checkprefix)
250      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
251      args_and_sig = genericize_check_lines([args_and_sig], is_analyze)[0]
252      if '[[' in args_and_sig:
253        output_lines.append(check_label_format % (checkprefix, func_name, ''))
254        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
255      else:
256        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
257      func_body = str(func_dict[checkprefix][func_name]).splitlines()
258
259      # For ASM output, just emit the check lines.
260      if is_asm:
261        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
262        for func_line in func_body[1:]:
263          if func_line.strip() == '':
264            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
265          else:
266            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
267        break
268
269      # For IR output, change all defs to FileCheck variables, so we're immune
270      # to variable naming fashions.
271      func_body = genericize_check_lines(func_body, is_analyze)
272
273      # This could be selectively enabled with an optional invocation argument.
274      # Disabled for now: better to check everything. Be safe rather than sorry.
275
276      # Handle the first line of the function body as a special case because
277      # it's often just noise (a useless asm comment or entry label).
278      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
279      #  is_blank_line = True
280      #else:
281      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
282      #  is_blank_line = False
283
284      is_blank_line = False
285
286      for func_line in func_body:
287        if func_line.strip() == '':
288          is_blank_line = True
289          continue
290        # Do not waste time checking IR comments.
291        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
292
293        # Skip blank lines instead of checking them.
294        if is_blank_line:
295          output_lines.append('{} {}:       {}'.format(
296              comment_marker, checkprefix, func_line))
297        else:
298          output_lines.append('{} {}-NEXT:  {}'.format(
299              comment_marker, checkprefix, func_line))
300        is_blank_line = False
301
302      # Add space between different check prefixes and also before the first
303      # line of code in the test function.
304      output_lines.append(comment_marker)
305      break
306
307def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
308                  func_name, preserve_names):
309  # Label format is based on IR string.
310  function_def_regex = 'define {{[^@]+}}'
311  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
312  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
313             check_label_format, False, preserve_names)
314
315def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
316  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
317  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True)
318
319
320def check_prefix(prefix):
321  if not PREFIX_RE.match(prefix):
322        hint = ""
323        if ',' in prefix:
324          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
325        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
326             (prefix))
327
328
329def verify_filecheck_prefixes(fc_cmd):
330  fc_cmd_parts = fc_cmd.split()
331  for part in fc_cmd_parts:
332    if "check-prefix=" in part:
333      prefix = part.split('=', 1)[1]
334      check_prefix(prefix)
335    elif "check-prefixes=" in part:
336      prefixes = part.split('=', 1)[1].split(',')
337      for prefix in prefixes:
338        check_prefix(prefix)
339        if prefixes.count(prefix) > 1:
340          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
341