xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision 7ad55a3df51a0d8c904fec3f52117932c23f0b01)
1from __future__ import print_function
2
3import copy
4import glob
5import re
6import subprocess
7import sys
8
9if sys.version_info[0] > 2:
10  class string:
11    expandtabs = str.expandtabs
12else:
13  import string
14
15##### Common utilities for update_*test_checks.py
16
17
18_verbose = False
19
20def parse_commandline_args(parser):
21  parser.add_argument('--include-generated-funcs', action='store_true',
22                      help='Output checks for functions not in source')
23  parser.add_argument('-v', '--verbose', action='store_true',
24                      help='Show verbose output')
25  parser.add_argument('-u', '--update-only', action='store_true',
26                      help='Only update test if it was already autogened')
27  parser.add_argument('--force-update', action='store_true',
28                      help='Update test even if it was autogened by a different script')
29  parser.add_argument('--enable', action='store_true', dest='enabled', default=True,
30                       help='Activate CHECK line generation from this point forward')
31  parser.add_argument('--disable', action='store_false', dest='enabled',
32                      help='Deactivate CHECK line generation from this point forward')
33  parser.add_argument('--replace-function-regex', nargs='+', default=[],
34                      help='List of regular expressions to replace matching function names')
35  args = parser.parse_args()
36  global _verbose
37  _verbose = args.verbose
38  return args
39
40
41class InputLineInfo(object):
42  def __init__(self, line, line_number, args, argv):
43    self.line = line
44    self.line_number = line_number
45    self.args = args
46    self.argv = argv
47
48
49class TestInfo(object):
50  def __init__(self, test, parser, script_name, input_lines, args, argv,
51               comment_prefix, argparse_callback):
52    self.parser = parser
53    self.argparse_callback = argparse_callback
54    self.path = test
55    self.args = args
56    self.argv = argv
57    self.input_lines = input_lines
58    self.run_lines = find_run_lines(test, self.input_lines)
59    self.comment_prefix = comment_prefix
60    if self.comment_prefix is None:
61      if self.path.endswith('.mir'):
62        self.comment_prefix = '#'
63      else:
64        self.comment_prefix = ';'
65    self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT
66    self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
67    self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
68
69  def ro_iterlines(self):
70    for line_num, input_line in enumerate(self.input_lines):
71      args, argv = check_for_command(input_line, self.parser,
72                                     self.args, self.argv, self.argparse_callback)
73      yield InputLineInfo(input_line, line_num, args, argv)
74
75  def iterlines(self, output_lines):
76    output_lines.append(self.test_autogenerated_note)
77    for line_info in self.ro_iterlines():
78      input_line = line_info.line
79      # Discard any previous script advertising.
80      if input_line.startswith(self.autogenerated_note_prefix):
81        continue
82      self.args = line_info.args
83      self.argv = line_info.argv
84      if not self.args.enabled:
85        output_lines.append(input_line)
86        continue
87      yield line_info
88
89def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None):
90  for pattern in test_patterns:
91    # On Windows we must expand the patterns ourselves.
92    tests_list = glob.glob(pattern)
93    if not tests_list:
94      warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
95      continue
96    for test in tests_list:
97      with open(test) as f:
98        input_lines = [l.rstrip() for l in f]
99      args = parser.parse_args()
100      if argparse_callback is not None:
101        argparse_callback(args)
102      argv = sys.argv[:]
103      first_line = input_lines[0] if input_lines else ""
104      if UTC_ADVERT in first_line:
105        if script_name not in first_line and not args.force_update:
106          warn("Skipping test which wasn't autogenerated by " + script_name, test)
107          continue
108        args, argv = check_for_command(first_line, parser, args, argv, argparse_callback)
109      elif args.update_only:
110        assert UTC_ADVERT not in first_line
111        warn("Skipping test which isn't autogenerated: " + test)
112        continue
113      yield TestInfo(test, parser, script_name, input_lines, args, argv,
114                     comment_prefix, argparse_callback)
115
116
117def should_add_line_to_output(input_line, prefix_set, skip_global_checks = False):
118  # Skip any blank comment lines in the IR.
119  if not skip_global_checks and input_line.strip() == ';':
120    return False
121  # Skip a special double comment line we use as a separator.
122  if input_line.strip() == SEPARATOR:
123    return False
124  # Skip any blank lines in the IR.
125  #if input_line.strip() == '':
126  #  return False
127  # And skip any CHECK lines. We're building our own.
128  m = CHECK_RE.match(input_line)
129  if m and m.group(1) in prefix_set:
130    if skip_global_checks:
131      global_ir_value_re = re.compile('\[\[', flags=(re.M))
132      return not global_ir_value_re.search(input_line)
133    return False
134
135  return True
136
137# Invoke the tool that is being tested.
138def invoke_tool(exe, cmd_args, ir):
139  with open(ir) as ir_file:
140    # TODO Remove the str form which is used by update_test_checks.py and
141    # update_llc_test_checks.py
142    # The safer list form is used by update_cc_test_checks.py
143    if isinstance(cmd_args, list):
144      stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
145    else:
146      stdout = subprocess.check_output(exe + ' ' + cmd_args,
147                                       shell=True, stdin=ir_file)
148    if sys.version_info[0] > 2:
149      stdout = stdout.decode()
150  # Fix line endings to unix CR style.
151  return stdout.replace('\r\n', '\n')
152
153##### LLVM IR parser
154RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
155CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
156PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
157CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
158
159UTC_ARGS_KEY = 'UTC_ARGS:'
160UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
161UTC_ADVERT = 'NOTE: Assertions have been autogenerated by '
162
163OPT_FUNCTION_RE = re.compile(
164    r'^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.$-]+?)\s*'
165    r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$',
166    flags=(re.M | re.S))
167
168ANALYZE_FUNCTION_RE = re.compile(
169    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':'
170    r'\s*\n(?P<body>.*)$',
171    flags=(re.X | re.S))
172
173IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
174TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
175TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
176MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
177
178SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
179SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
180SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
181SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
182SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
183SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
184SCRUB_LOOP_COMMENT_RE = re.compile(
185    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
186SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
187
188SEPARATOR = ';.'
189
190def error(msg, test_file=None):
191  if test_file:
192    msg = '{}: {}'.format(msg, test_file)
193  print('ERROR: {}'.format(msg), file=sys.stderr)
194
195def warn(msg, test_file=None):
196  if test_file:
197    msg = '{}: {}'.format(msg, test_file)
198  print('WARNING: {}'.format(msg), file=sys.stderr)
199
200def debug(*args, **kwargs):
201  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
202  if 'file' not in kwargs:
203    kwargs['file'] = sys.stderr
204  if _verbose:
205    print(*args, **kwargs)
206
207def find_run_lines(test, lines):
208  debug('Scanning for RUN lines in test file:', test)
209  raw_lines = [m.group(1)
210               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
211  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
212  for l in raw_lines[1:]:
213    if run_lines[-1].endswith('\\'):
214      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
215    else:
216      run_lines.append(l)
217  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
218  for l in run_lines:
219    debug('  RUN: {}'.format(l))
220  return run_lines
221
222def scrub_body(body):
223  # Scrub runs of whitespace out of the assembly, but leave the leading
224  # whitespace in place.
225  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
226  # Expand the tabs used for indentation.
227  body = string.expandtabs(body, 2)
228  # Strip trailing whitespace.
229  body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
230  return body
231
232def do_scrub(body, scrubber, scrubber_args, extra):
233  if scrubber_args:
234    local_args = copy.deepcopy(scrubber_args)
235    local_args[0].extra_scrub = extra
236    return scrubber(body, *local_args)
237  return scrubber(body, *scrubber_args)
238
239# Build up a dictionary of all the function bodies.
240class function_body(object):
241  def __init__(self, string, extra, args_and_sig, attrs):
242    self.scrub = string
243    self.extrascrub = extra
244    self.args_and_sig = args_and_sig
245    self.attrs = attrs
246  def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs):
247    arg_names = set()
248    def drop_arg_names(match):
249        arg_names.add(match.group(variable_group_in_ir_value_match))
250        if match.group(attribute_group_in_ir_value_match):
251            attr = match.group(attribute_group_in_ir_value_match)
252        else:
253            attr = ''
254        return match.group(1) + attr + match.group(match.lastindex)
255    def repl_arg_names(match):
256        if match.group(variable_group_in_ir_value_match) is not None and match.group(variable_group_in_ir_value_match) in arg_names:
257            return match.group(1) + match.group(match.lastindex)
258        return match.group(1) + match.group(2) + match.group(match.lastindex)
259    if self.attrs != attrs:
260      return False
261    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
262    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
263    if ans0 != ans1:
264        return False
265    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
266    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
267    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
268    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
269    return es0 == es1
270
271  def __str__(self):
272    return self.scrub
273
274class FunctionTestBuilder:
275  def __init__(self, run_list, flags, scrubber_args):
276    self._verbose = flags.verbose
277    self._record_args = flags.function_signature
278    self._check_attributes = flags.check_attributes
279    self._scrubber_args = scrubber_args
280    # Strip double-quotes if input was read by UTC_ARGS
281    self._replace_function_regex = list(map(lambda x: x.strip('"'), flags.replace_function_regex))
282    self._func_dict = {}
283    self._func_order = {}
284    self._global_var_dict = {}
285    for tuple in run_list:
286      for prefix in tuple[0]:
287        self._func_dict.update({prefix:dict()})
288        self._func_order.update({prefix: []})
289        self._global_var_dict.update({prefix:dict()})
290
291  def finish_and_get_func_dict(self):
292    for prefix in self._get_failed_prefixes():
293      warn('Prefix %s had conflicting output from different RUN lines for all functions' % (prefix,))
294    return self._func_dict
295
296  def func_order(self):
297    return self._func_order
298
299  def global_var_dict(self):
300    return self._global_var_dict
301
302  def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
303    build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
304    for m in function_re.finditer(raw_tool_output):
305      if not m:
306        continue
307      func = m.group('func')
308      body = m.group('body')
309      attrs = m.group('attrs') if self._check_attributes else ''
310      # Determine if we print arguments, the opening brace, or nothing after the
311      # function name
312      if self._record_args and 'args_and_sig' in m.groupdict():
313          args_and_sig = scrub_body(m.group('args_and_sig').strip())
314      elif 'args_and_sig' in m.groupdict():
315          args_and_sig = '('
316      else:
317          args_and_sig = ''
318      scrubbed_body = do_scrub(body, scrubber, self._scrubber_args,
319                               extra=False)
320      scrubbed_extra = do_scrub(body, scrubber, self._scrubber_args,
321                                extra=True)
322      if 'analysis' in m.groupdict():
323        analysis = m.group('analysis')
324        if analysis.lower() != 'cost model analysis':
325          warn('Unsupported analysis mode: %r!' % (analysis,))
326      if func.startswith('stress'):
327        # We only use the last line of the function body for stress tests.
328        scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
329      if self._verbose:
330        print('Processing function: ' + func, file=sys.stderr)
331        for l in scrubbed_body.splitlines():
332          print('  ' + l, file=sys.stderr)
333      for prefix in prefixes:
334        if func in self._func_dict[prefix]:
335          if (self._func_dict[prefix][func] is None or
336              str(self._func_dict[prefix][func]) != scrubbed_body or
337              self._func_dict[prefix][func].args_and_sig != args_and_sig or
338                  self._func_dict[prefix][func].attrs != attrs):
339            if (self._func_dict[prefix][func] is not None and
340                self._func_dict[prefix][func].is_same_except_arg_names(
341                scrubbed_extra,
342                args_and_sig,
343                attrs)):
344              self._func_dict[prefix][func].scrub = scrubbed_extra
345              self._func_dict[prefix][func].args_and_sig = args_and_sig
346              continue
347            else:
348              # This means a previous RUN line produced a body for this function
349              # that is different from the one produced by this current RUN line,
350              # so the body can't be common accross RUN lines. We use None to
351              # indicate that.
352              self._func_dict[prefix][func] = None
353              continue
354
355        # Replace function names matching the regex.
356        for regex in self._replace_function_regex:
357          # Pattern that matches capture groups in the regex in leftmost order.
358          group_regex = re.compile('\(.*?\)')
359          # Replace function name with regex.
360          match = re.match(regex, func)
361          if match:
362            func_repl = regex
363            # Replace any capture groups with their matched strings.
364            for g in match.groups():
365              func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
366            func = re.sub(func_repl, '{{' + func_repl + '}}', func)
367
368          # Replace all calls to regex matching functions.
369          matches = re.finditer(regex, scrubbed_body)
370          for match in matches:
371            func_repl = regex
372            # Replace any capture groups with their matched strings.
373            for g in match.groups():
374                func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
375            # Substitute function call names that match the regex with the same
376            # capture groups set.
377            scrubbed_body = re.sub(func_repl, '{{' + func_repl + '}}', scrubbed_body)
378
379        self._func_dict[prefix][func] = function_body(
380            scrubbed_body, scrubbed_extra, args_and_sig, attrs)
381        self._func_order[prefix].append(func)
382
383  def _get_failed_prefixes(self):
384    # This returns the list of those prefixes that failed to match any function,
385    # because there were conflicting bodies produced by different RUN lines, in
386    # all instances of the prefix. Effectively, this prefix is unused and should
387    # be removed.
388    for prefix in self._func_dict:
389      if (self._func_dict[prefix] and
390          (not [fct for fct in self._func_dict[prefix]
391                if self._func_dict[prefix][fct] is not None])):
392        yield prefix
393
394
395##### Generator of LLVM IR CHECK lines
396
397SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
398
399# TODO: We should also derive check lines for global, debug, loop declarations, etc..
400
401class NamelessValue:
402    def __init__(self, check_prefix, check_key, ir_prefix, global_ir_prefix, global_ir_prefix_regexp,
403                 ir_regexp, global_ir_rhs_regexp, is_before_functions):
404        self.check_prefix = check_prefix
405        self.check_key = check_key
406        self.ir_prefix = ir_prefix
407        self.global_ir_prefix = global_ir_prefix
408        self.global_ir_prefix_regexp = global_ir_prefix_regexp
409        self.ir_regexp = ir_regexp
410        self.global_ir_rhs_regexp = global_ir_rhs_regexp
411        self.is_before_functions = is_before_functions
412
413# Description of the different "unnamed" values we match in the IR, e.g.,
414# (local) ssa values, (debug) metadata, etc.
415nameless_values = [
416    NamelessValue(r'TMP'  , '%' , r'%'           , None            , None                   , r'[\w$.-]+?' , None                 , False) ,
417    NamelessValue(r'ATTR' , '#' , r'#'           , None            , None                   , r'[0-9]+'    , None                 , False) ,
418    NamelessValue(r'ATTR' , '#' , None           , r'attributes #' , r'[0-9]+'              , None         , r'{[^}]*}'           , False) ,
419    NamelessValue(r'GLOB' , '@' , r'@'           , None            , None                   , r'[0-9]+'    , None                 , False) ,
420    NamelessValue(r'GLOB' , '@' , None           , r'@'            , r'[a-zA-Z0-9_$"\\.-]+' , None         , r'.+'                , True)  ,
421    NamelessValue(r'DBG'  , '!' , r'!dbg '       , None            , None                   , r'![0-9]+'   , None                 , False) ,
422    NamelessValue(r'TBAA' , '!' , r'!tbaa '      , None            , None                   , r'![0-9]+'   , None                 , False) ,
423    NamelessValue(r'RNG'  , '!' , r'!range '     , None            , None                   , r'![0-9]+'   , None                 , False) ,
424    NamelessValue(r'LOOP' , '!' , r'!llvm.loop ' , None            , None                   , r'![0-9]+'   , None                 , False) ,
425    NamelessValue(r'META' , '!' , r'metadata '   , None            , None                   , r'![0-9]+'   , None                 , False) ,
426    NamelessValue(r'META' , '!' , None           , r''             , r'![0-9]+'             , None         , r'(?:distinct |)!.*' , False) ,
427]
428
429def createOrRegexp(old, new):
430    if not old:
431        return new
432    if not new:
433        return old
434    return old + '|' + new
435
436def createPrefixMatch(prefix_str, prefix_re):
437    if prefix_str is None or prefix_re is None:
438        return ''
439    return '(?:' + prefix_str + '(' + prefix_re + '))'
440
441# Build the regexp that matches an "IR value". This can be a local variable,
442# argument, global, or metadata, anything that is "named". It is important that
443# the PREFIX and SUFFIX below only contain a single group, if that changes
444# other locations will need adjustment as well.
445IR_VALUE_REGEXP_PREFIX = r'(\s*)'
446IR_VALUE_REGEXP_STRING = r''
447for nameless_value in nameless_values:
448    lcl_match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
449    glb_match = createPrefixMatch(nameless_value.global_ir_prefix, nameless_value.global_ir_prefix_regexp)
450    assert((lcl_match or glb_match) and not (lcl_match and glb_match))
451    if lcl_match:
452        IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, lcl_match)
453    elif glb_match:
454        IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, '^' + glb_match)
455IR_VALUE_REGEXP_SUFFIX = r'([,\s\(\)]|\Z)'
456IR_VALUE_RE = re.compile(IR_VALUE_REGEXP_PREFIX + r'(' + IR_VALUE_REGEXP_STRING + r')' + IR_VALUE_REGEXP_SUFFIX)
457
458# The entire match is group 0, the prefix has one group (=1), the entire
459# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
460first_nameless_group_in_ir_value_match = 3
461
462# constants for the group id of special matches
463variable_group_in_ir_value_match = 3
464attribute_group_in_ir_value_match = 4
465
466# Check a match for IR_VALUE_RE and inspect it to determine if it was a local
467# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
468def get_idx_from_ir_value_match(match):
469    for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
470        if match.group(i) is not None:
471            return i - first_nameless_group_in_ir_value_match
472    error("Unable to identify the kind of IR value from the match!")
473    return 0
474
475# See get_idx_from_ir_value_match
476def get_name_from_ir_value_match(match):
477    return match.group(get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match)
478
479# Return the nameless prefix we use for this kind or IR value, see also
480# get_idx_from_ir_value_match
481def get_nameless_check_prefix_from_ir_value_match(match):
482    return nameless_values[get_idx_from_ir_value_match(match)].check_prefix
483
484# Return the IR prefix and check prefix we use for this kind or IR value, e.g., (%, TMP) for locals,
485# see also get_idx_from_ir_value_match
486def get_ir_prefix_from_ir_value_match(match):
487    idx = get_idx_from_ir_value_match(match)
488    if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix):
489        return nameless_values[idx].ir_prefix, nameless_values[idx].check_prefix
490    return nameless_values[idx].global_ir_prefix, nameless_values[idx].check_prefix
491
492def get_check_key_from_ir_value_match(match):
493    idx = get_idx_from_ir_value_match(match)
494    return nameless_values[idx].check_key
495
496# Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals,
497# see also get_idx_from_ir_value_match
498def get_ir_prefix_from_ir_value_re_match(match):
499    # for backwards compatibility we check locals with '.*'
500    if is_local_def_ir_value_match(match):
501        return '.*'
502    idx = get_idx_from_ir_value_match(match)
503    if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix):
504        return nameless_values[idx].ir_regexp
505    return nameless_values[idx].global_ir_prefix_regexp
506
507# Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
508def is_local_def_ir_value_match(match):
509    return nameless_values[get_idx_from_ir_value_match(match)].ir_prefix == '%'
510
511# Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'.
512def is_global_scope_ir_value_match(match):
513    return nameless_values[get_idx_from_ir_value_match(match)].global_ir_prefix is not None
514
515# Create a FileCheck variable name based on an IR name.
516def get_value_name(var, check_prefix):
517  var = var.replace('!', '')
518  if var.isdigit():
519    var = check_prefix + var
520  var = var.replace('.', '_')
521  var = var.replace('-', '_')
522  return var.upper()
523
524# Create a FileCheck variable from regex.
525def get_value_definition(var, match):
526  # for backwards compatibility we check locals with '.*'
527  if is_local_def_ir_value_match(match):
528    return '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + \
529            get_ir_prefix_from_ir_value_match(match)[0] + get_ir_prefix_from_ir_value_re_match(match) + ']]'
530  prefix = get_ir_prefix_from_ir_value_match(match)[0]
531  return prefix + '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + get_ir_prefix_from_ir_value_re_match(match) + ']]'
532
533# Use a FileCheck variable.
534def get_value_use(var, match, check_prefix):
535  if is_local_def_ir_value_match(match):
536    return '[[' + get_value_name(var, check_prefix) + ']]'
537  prefix = get_ir_prefix_from_ir_value_match(match)[0]
538  return prefix + '[[' + get_value_name(var, check_prefix) + ']]'
539
540# Replace IR value defs and uses with FileCheck variables.
541def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen):
542  # This gets called for each match that occurs in
543  # a line. We transform variables we haven't seen
544  # into defs, and variables we have seen into uses.
545  def transform_line_vars(match):
546    pre, check = get_ir_prefix_from_ir_value_match(match)
547    var = get_name_from_ir_value_match(match)
548    for nameless_value in nameless_values:
549        if nameless_value.check_prefix and re.match(r'^' + nameless_value.check_prefix + r'[0-9]+?$', var, re.IGNORECASE):
550            warn("Change IR value name '%s' to prevent possible conflict with scripted FileCheck name." % (var,))
551    key = (var, get_check_key_from_ir_value_match(match))
552    is_local_def = is_local_def_ir_value_match(match)
553    if is_local_def and key in vars_seen:
554      rv = get_value_use(var, match, get_nameless_check_prefix_from_ir_value_match(match))
555    elif not is_local_def and key in global_vars_seen:
556      rv = get_value_use(var, match, global_vars_seen[key])
557    else:
558      if is_local_def:
559         vars_seen.add(key)
560      else:
561         global_vars_seen[key] = get_nameless_check_prefix_from_ir_value_match(match)
562      rv = get_value_definition(var, match)
563    # re.sub replaces the entire regex match
564    # with whatever you return, so we have
565    # to make sure to hand it back everything
566    # including the commas and spaces.
567    return match.group(1) + rv + match.group(match.lastindex)
568
569  lines_with_def = []
570
571  for i, line in enumerate(lines):
572    # An IR variable named '%.' matches the FileCheck regex string.
573    line = line.replace('%.', '%dot')
574    # Ignore any comments, since the check lines will too.
575    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
576    lines[i] = scrubbed_line
577    if not is_analyze:
578      # It can happen that two matches are back-to-back and for some reason sub
579      # will not replace both of them. For now we work around this by
580      # substituting until there is no more match.
581      changed = True
582      while changed:
583          (lines[i], changed) = IR_VALUE_RE.subn(transform_line_vars, lines[i], count=1)
584  return lines
585
586
587def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze, global_vars_seen_dict):
588  # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
589  prefix_exclusions = set()
590  printed_prefixes = []
591  for p in prefix_list:
592    checkprefixes = p[0]
593    # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
594    # exist for this run line. A subset of the check prefixes might know about the function but only because
595    # other run lines created it.
596    if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
597        prefix_exclusions |= set(checkprefixes)
598        continue
599
600  # prefix_exclusions is constructed, we can now emit the output
601  for p in prefix_list:
602    global_vars_seen = {}
603    checkprefixes = p[0]
604    for checkprefix in checkprefixes:
605      if checkprefix in global_vars_seen_dict:
606        global_vars_seen.update(global_vars_seen_dict[checkprefix])
607      else:
608        global_vars_seen_dict[checkprefix] = {}
609      if checkprefix in printed_prefixes:
610        break
611
612      # Check if the prefix is excluded.
613      if checkprefix in prefix_exclusions:
614        continue
615
616      # If we do not have output for this prefix we skip it.
617      if not func_dict[checkprefix][func_name]:
618        continue
619
620      # Add some space between different check prefixes, but not after the last
621      # check line (before the test code).
622      if is_asm:
623        if len(printed_prefixes) != 0:
624          output_lines.append(comment_marker)
625
626      if checkprefix not in global_vars_seen_dict:
627          global_vars_seen_dict[checkprefix] = {}
628
629      global_vars_seen_before = [key for key in global_vars_seen.keys()]
630
631      vars_seen = set()
632      printed_prefixes.append(checkprefix)
633      attrs = str(func_dict[checkprefix][func_name].attrs)
634      attrs = '' if attrs == 'None' else attrs
635      if attrs:
636        output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs))
637      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
638      args_and_sig = generalize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0]
639      if '[[' in args_and_sig:
640        output_lines.append(check_label_format % (checkprefix, func_name, ''))
641        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
642      else:
643        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
644      func_body = str(func_dict[checkprefix][func_name]).splitlines()
645
646      # For ASM output, just emit the check lines.
647      if is_asm:
648        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
649        for func_line in func_body[1:]:
650          if func_line.strip() == '':
651            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
652          else:
653            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
654        break
655
656      # For IR output, change all defs to FileCheck variables, so we're immune
657      # to variable naming fashions.
658      func_body = generalize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen)
659
660      # This could be selectively enabled with an optional invocation argument.
661      # Disabled for now: better to check everything. Be safe rather than sorry.
662
663      # Handle the first line of the function body as a special case because
664      # it's often just noise (a useless asm comment or entry label).
665      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
666      #  is_blank_line = True
667      #else:
668      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
669      #  is_blank_line = False
670
671      is_blank_line = False
672
673      for func_line in func_body:
674        if func_line.strip() == '':
675          is_blank_line = True
676          continue
677        # Do not waste time checking IR comments.
678        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
679
680        # Skip blank lines instead of checking them.
681        if is_blank_line:
682          output_lines.append('{} {}:       {}'.format(
683              comment_marker, checkprefix, func_line))
684        else:
685          output_lines.append('{} {}-NEXT:  {}'.format(
686              comment_marker, checkprefix, func_line))
687        is_blank_line = False
688
689      # Add space between different check prefixes and also before the first
690      # line of code in the test function.
691      output_lines.append(comment_marker)
692
693      # Remembe new global variables we have not seen before
694      for key in global_vars_seen:
695          if key not in global_vars_seen_before:
696              global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
697      break
698
699def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
700                  func_name, preserve_names, function_sig, global_vars_seen_dict):
701  # Label format is based on IR string.
702  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
703  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
704  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
705             check_label_format, False, preserve_names, global_vars_seen_dict)
706
707def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
708  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
709  global_vars_seen_dict = {}
710  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
711             check_label_format, False, True, global_vars_seen_dict)
712
713def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
714  for nameless_value in nameless_values:
715    if nameless_value.global_ir_prefix is None:
716      continue
717
718    lhs_re_str = nameless_value.global_ir_prefix + nameless_value.global_ir_prefix_regexp
719    rhs_re_str = nameless_value.global_ir_rhs_regexp
720
721    global_ir_value_re_str = r'^' + lhs_re_str + r'\s=\s' + rhs_re_str + r'$'
722    global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
723    lines = []
724    for m in global_ir_value_re.finditer(raw_tool_output):
725        lines.append(m.group(0))
726
727    for prefix in prefixes:
728      if glob_val_dict[prefix] is None:
729        continue
730      if nameless_value.check_prefix in glob_val_dict[prefix]:
731        if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
732          continue
733        if prefix == prefixes[-1]:
734          warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
735        else:
736          glob_val_dict[prefix][nameless_value.check_prefix] = None
737          continue
738      glob_val_dict[prefix][nameless_value.check_prefix] = lines
739
740def add_global_checks(glob_val_dict, comment_marker, prefix_list, output_lines, global_vars_seen_dict, is_analyze, is_before_functions):
741  printed_prefixes = set()
742  for nameless_value in nameless_values:
743    if nameless_value.global_ir_prefix is None:
744        continue
745    if nameless_value.is_before_functions != is_before_functions:
746        continue
747    for p in prefix_list:
748      global_vars_seen = {}
749      checkprefixes = p[0]
750      for checkprefix in checkprefixes:
751        if checkprefix in global_vars_seen_dict:
752            global_vars_seen.update(global_vars_seen_dict[checkprefix])
753        else:
754            global_vars_seen_dict[checkprefix] = {}
755        if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
756          break
757        if not glob_val_dict[checkprefix]:
758          continue
759        if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
760          continue
761        if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
762          continue
763
764        output_lines.append(SEPARATOR)
765
766        global_vars_seen_before = [key for key in global_vars_seen.keys()]
767        for line in glob_val_dict[checkprefix][nameless_value.check_prefix]:
768          tmp = generalize_check_lines([line], is_analyze, set(), global_vars_seen)
769          check_line = '%s %s: %s' % (comment_marker, checkprefix, tmp[0])
770          output_lines.append(check_line)
771        printed_prefixes.add((checkprefix, nameless_value.check_prefix))
772
773        # Remembe new global variables we have not seen before
774        for key in global_vars_seen:
775            if key not in global_vars_seen_before:
776                global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
777        break
778
779  if printed_prefixes:
780      output_lines.append(SEPARATOR)
781
782
783def check_prefix(prefix):
784  if not PREFIX_RE.match(prefix):
785        hint = ""
786        if ',' in prefix:
787          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
788        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
789             (prefix))
790
791
792def verify_filecheck_prefixes(fc_cmd):
793  fc_cmd_parts = fc_cmd.split()
794  for part in fc_cmd_parts:
795    if "check-prefix=" in part:
796      prefix = part.split('=', 1)[1]
797      check_prefix(prefix)
798    elif "check-prefixes=" in part:
799      prefixes = part.split('=', 1)[1].split(',')
800      for prefix in prefixes:
801        check_prefix(prefix)
802        if prefixes.count(prefix) > 1:
803          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
804
805
806def get_autogennote_suffix(parser, args):
807  autogenerated_note_args = ''
808  for action in parser._actions:
809    if not hasattr(args, action.dest):
810      continue  # Ignore options such as --help that aren't included in args
811    # Ignore parameters such as paths to the binary or the list of tests
812    if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary',
813                       'clang', 'opt', 'llvm_bin', 'verbose'):
814      continue
815    value = getattr(args, action.dest)
816    if action.const is not None:  # action stores a constant (usually True/False)
817      # Skip actions with different constant values (this happens with boolean
818      # --foo/--no-foo options)
819      if value != action.const:
820        continue
821    if parser.get_default(action.dest) == value:
822      continue  # Don't add default values
823    autogenerated_note_args += action.option_strings[0] + ' '
824    if action.const is None:  # action takes a parameter
825      if action.nargs == '+':
826        value = ' '.join(map(lambda v: '"' + v.strip('"') + '"', value))
827      autogenerated_note_args += '%s ' % value
828  if autogenerated_note_args:
829    autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
830  return autogenerated_note_args
831
832
833def check_for_command(line, parser, args, argv, argparse_callback):
834    cmd_m = UTC_ARGS_CMD.match(line)
835    if cmd_m:
836        for option in cmd_m.group('cmd').strip().split(' '):
837            if option:
838                argv.append(option)
839        args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
840        if argparse_callback is not None:
841          argparse_callback(args)
842    return args, argv
843
844def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
845  result = get_arg_to_check(test_info.args)
846  if not result and is_global:
847    # See if this has been specified via UTC_ARGS.  This is a "global" option
848    # that affects the entire generation of test checks.  If it exists anywhere
849    # in the test, apply it to everything.
850    saw_line = False
851    for line_info in test_info.ro_iterlines():
852      line = line_info.line
853      if not line.startswith(';') and line.strip() != '':
854        saw_line = True
855      result = get_arg_to_check(line_info.args)
856      if result:
857        if warn and saw_line:
858          # We saw the option after already reading some test input lines.
859          # Warn about it.
860          print('WARNING: Found {} in line following test start: '.format(arg_string)
861                + line, file=sys.stderr)
862          print('WARNING: Consider moving {} to top of file'.format(arg_string),
863                file=sys.stderr)
864        break
865  return result
866
867def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
868  for input_line_info in test_info.iterlines(output_lines):
869    line = input_line_info.line
870    args = input_line_info.args
871    if line.strip() == comment_string:
872      continue
873    if line.strip() == SEPARATOR:
874      continue
875    if line.lstrip().startswith(comment_string):
876      m = CHECK_RE.match(line)
877      if m and m.group(1) in prefix_set:
878        continue
879    output_lines.append(line.rstrip('\n'))
880
881def add_checks_at_end(output_lines, prefix_list, func_order,
882                      comment_string, check_generator):
883  added = set()
884  for prefix in prefix_list:
885    prefixes = prefix[0]
886    tool_args = prefix[1]
887    for prefix in prefixes:
888      for func in func_order[prefix]:
889        if added:
890          output_lines.append(comment_string)
891        added.add(func)
892
893        # The add_*_checks routines expect a run list whose items are
894        # tuples that have a list of prefixes as their first element and
895        # tool command args string as their second element.  They output
896        # checks for each prefix in the list of prefixes.  By doing so, it
897        # implicitly assumes that for each function every run line will
898        # generate something for that function.  That is not the case for
899        # generated functions as some run lines might not generate them
900        # (e.g. -fopenmp vs. no -fopenmp).
901        #
902        # Therefore, pass just the prefix we're interested in.  This has
903        # the effect of generating all of the checks for functions of a
904        # single prefix before moving on to the next prefix.  So checks
905        # are ordered by prefix instead of by function as in "normal"
906        # mode.
907        check_generator(output_lines,
908                        [([prefix], tool_args)],
909                        func)
910