xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision 5b39387fb8aa6f8b755b633736f3548e348d0ee6)
1from __future__ import print_function
2
3import argparse
4import copy
5import glob
6import itertools
7import os
8import re
9import subprocess
10import sys
11import shlex
12
13from typing import List
14
15##### Common utilities for update_*test_checks.py
16
17
18_verbose = False
19_prefix_filecheck_ir_name = ''
20
21class Regex(object):
22  """Wrap a compiled regular expression object to allow deep copy of a regexp.
23  This is required for the deep copy done in do_scrub.
24
25  """
26  def __init__(self, regex):
27    self.regex = regex
28
29  def __deepcopy__(self, memo):
30    result = copy.copy(self)
31    result.regex = self.regex
32    return result
33
34  def search(self, line):
35    return self.regex.search(line)
36
37  def sub(self, repl, line):
38    return self.regex.sub(repl, line)
39
40  def pattern(self):
41    return self.regex.pattern
42
43  def flags(self):
44    return self.regex.flags
45
46class Filter(Regex):
47  """Augment a Regex object with a flag indicating whether a match should be
48    added (!is_filter_out) or removed (is_filter_out) from the generated checks.
49
50  """
51  def __init__(self, regex, is_filter_out):
52    super(Filter, self).__init__(regex)
53    self.is_filter_out = is_filter_out
54
55  def __deepcopy__(self, memo):
56    result = copy.deepcopy(super(Filter, self), memo)
57    result.is_filter_out = copy.deepcopy(self.is_filter_out, memo)
58    return result
59
60def parse_commandline_args(parser):
61  class RegexAction(argparse.Action):
62    """Add a regular expression option value to a list of regular expressions.
63    This compiles the expression, wraps it in a Regex and adds it to the option
64    value list."""
65    def __init__(self, option_strings, dest, nargs=None, **kwargs):
66      if nargs is not None:
67        raise ValueError('nargs not allowed')
68      super(RegexAction, self).__init__(option_strings, dest, **kwargs)
69
70    def do_call(self, namespace, values, flags):
71      value_list = getattr(namespace, self.dest)
72      if value_list is None:
73        value_list = []
74
75      try:
76        value_list.append(Regex(re.compile(values, flags)))
77      except re.error as error:
78        raise ValueError('{}: Invalid regular expression \'{}\' ({})'.format(
79          option_string, error.pattern, error.msg))
80
81      setattr(namespace, self.dest, value_list)
82
83    def __call__(self, parser, namespace, values, option_string=None):
84      self.do_call(namespace, values, 0)
85
86  class FilterAction(RegexAction):
87    """Add a filter to a list of filter option values."""
88    def __init__(self, option_strings, dest, nargs=None, **kwargs):
89      super(FilterAction, self).__init__(option_strings, dest, nargs, **kwargs)
90
91    def __call__(self, parser, namespace, values, option_string=None):
92      super(FilterAction, self).__call__(parser, namespace, values, option_string)
93
94      value_list = getattr(namespace, self.dest)
95
96      is_filter_out = ( option_string == '--filter-out' )
97
98      value_list[-1] = Filter(value_list[-1].regex, is_filter_out)
99
100      setattr(namespace, self.dest, value_list)
101
102  filter_group = parser.add_argument_group(
103    'filtering',
104    """Filters are applied to each output line according to the order given. The
105    first matching filter terminates filter processing for that current line.""")
106
107  filter_group.add_argument('--filter', action=FilterAction, dest='filters',
108                            metavar='REGEX',
109                            help='Only include lines matching REGEX (may be specified multiple times)')
110  filter_group.add_argument('--filter-out', action=FilterAction, dest='filters',
111                            metavar='REGEX',
112                            help='Exclude lines matching REGEX')
113
114  parser.add_argument('--include-generated-funcs', action='store_true',
115                      help='Output checks for functions not in source')
116  parser.add_argument('-v', '--verbose', action='store_true',
117                      help='Show verbose output')
118  parser.add_argument('-u', '--update-only', action='store_true',
119                      help='Only update test if it was already autogened')
120  parser.add_argument('--force-update', action='store_true',
121                      help='Update test even if it was autogened by a different script')
122  parser.add_argument('--enable', action='store_true', dest='enabled', default=True,
123                       help='Activate CHECK line generation from this point forward')
124  parser.add_argument('--disable', action='store_false', dest='enabled',
125                      help='Deactivate CHECK line generation from this point forward')
126  parser.add_argument('--replace-value-regex', nargs='+', default=[],
127                      help='List of regular expressions to replace matching value names')
128  parser.add_argument('--prefix-filecheck-ir-name', default='',
129                      help='Add a prefix to FileCheck IR value names to avoid conflicts with scripted names')
130  parser.add_argument('--global-value-regex', nargs='+', default=[],
131                      help='List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)')
132  parser.add_argument('--global-hex-value-regex', nargs='+', default=[],
133                      help='List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives')
134  # FIXME: in 3.9, we can use argparse.BooleanOptionalAction. At that point,
135  # we need to rename the flag to just -generate-body-for-unused-prefixes.
136  parser.add_argument('--no-generate-body-for-unused-prefixes',
137                      action='store_false',
138                      dest='gen_unused_prefix_body',
139                      default=True,
140                      help='Generate a function body that always matches for unused prefixes. This is useful when unused prefixes are desired, and it avoids needing to annotate each FileCheck as allowing them.')
141  args = parser.parse_args()
142  global _verbose, _global_value_regex, _global_hex_value_regex
143  _verbose = args.verbose
144  _global_value_regex = args.global_value_regex
145  _global_hex_value_regex = args.global_hex_value_regex
146  return args
147
148
149class InputLineInfo(object):
150  def __init__(self, line, line_number, args, argv):
151    self.line = line
152    self.line_number = line_number
153    self.args = args
154    self.argv = argv
155
156
157class TestInfo(object):
158  def __init__(self, test, parser, script_name, input_lines, args, argv,
159               comment_prefix, argparse_callback):
160    self.parser = parser
161    self.argparse_callback = argparse_callback
162    self.path = test
163    self.args = args
164    if args.prefix_filecheck_ir_name:
165      global _prefix_filecheck_ir_name
166      _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
167    self.argv = argv
168    self.input_lines = input_lines
169    self.run_lines = find_run_lines(test, self.input_lines)
170    self.comment_prefix = comment_prefix
171    if self.comment_prefix is None:
172      if self.path.endswith('.mir'):
173        self.comment_prefix = '#'
174      else:
175        self.comment_prefix = ';'
176    self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT
177    self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
178    self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
179    self.test_unused_note = self.comment_prefix + self.comment_prefix + ' ' + UNUSED_NOTE
180
181  def ro_iterlines(self):
182    for line_num, input_line in enumerate(self.input_lines):
183      args, argv = check_for_command(input_line, self.parser,
184                                     self.args, self.argv, self.argparse_callback)
185      yield InputLineInfo(input_line, line_num, args, argv)
186
187  def iterlines(self, output_lines):
188    output_lines.append(self.test_autogenerated_note)
189    for line_info in self.ro_iterlines():
190      input_line = line_info.line
191      # Discard any previous script advertising.
192      if input_line.startswith(self.autogenerated_note_prefix):
193        continue
194      self.args = line_info.args
195      self.argv = line_info.argv
196      if not self.args.enabled:
197        output_lines.append(input_line)
198        continue
199      yield line_info
200
201  def get_checks_for_unused_prefixes(self, run_list, used_prefixes: List[str]) -> List[str]:
202    run_list = [element for element in run_list if element[0] is not None]
203    unused_prefixes = set([
204        prefix for sublist in run_list for prefix in sublist[0]
205    ]).difference(set(used_prefixes))
206
207    ret = []
208    if not unused_prefixes:
209      return ret
210    ret.append(self.test_unused_note)
211    for unused in sorted(unused_prefixes):
212      ret.append('{comment} {prefix}: {match_everything}'.format(
213        comment=self.comment_prefix,
214        prefix=unused,
215        match_everything=r"""{{.*}}"""
216      ))
217    return ret
218
219def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None):
220  for pattern in test_patterns:
221    # On Windows we must expand the patterns ourselves.
222    tests_list = glob.glob(pattern)
223    if not tests_list:
224      warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
225      continue
226    for test in tests_list:
227      with open(test) as f:
228        input_lines = [l.rstrip() for l in f]
229      args = parser.parse_args()
230      if argparse_callback is not None:
231        argparse_callback(args)
232      argv = sys.argv[:]
233      first_line = input_lines[0] if input_lines else ""
234      if UTC_ADVERT in first_line:
235        if script_name not in first_line and not args.force_update:
236          warn("Skipping test which wasn't autogenerated by " + script_name, test)
237          continue
238        args, argv = check_for_command(first_line, parser, args, argv, argparse_callback)
239      elif args.update_only:
240        assert UTC_ADVERT not in first_line
241        warn("Skipping test which isn't autogenerated: " + test)
242        continue
243      final_input_lines = []
244      for l in input_lines:
245        if UNUSED_NOTE in l:
246          break
247        final_input_lines.append(l)
248      yield TestInfo(test, parser, script_name, final_input_lines, args, argv,
249                     comment_prefix, argparse_callback)
250
251
252def should_add_line_to_output(input_line, prefix_set, skip_global_checks = False, comment_marker = ';'):
253  # Skip any blank comment lines in the IR.
254  if not skip_global_checks and input_line.strip() == comment_marker:
255    return False
256  # Skip a special double comment line we use as a separator.
257  if input_line.strip() == comment_marker + SEPARATOR:
258    return False
259  # Skip any blank lines in the IR.
260  #if input_line.strip() == '':
261  #  return False
262  # And skip any CHECK lines. We're building our own.
263  m = CHECK_RE.match(input_line)
264  if m and m.group(1) in prefix_set:
265    if skip_global_checks:
266      global_ir_value_re = re.compile(r'\[\[', flags=(re.M))
267      return not global_ir_value_re.search(input_line)
268    return False
269
270  return True
271
272# Perform lit-like substitutions
273def getSubstitutions(sourcepath):
274  sourcedir = os.path.dirname(sourcepath)
275  return [('%s', sourcepath),
276          ('%S', sourcedir),
277          ('%p', sourcedir),
278          ('%{pathsep}', os.pathsep)]
279
280def applySubstitutions(s, substitutions):
281  for a,b in substitutions:
282    s = s.replace(a, b)
283  return s
284
285# Invoke the tool that is being tested.
286def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
287  with open(ir) as ir_file:
288    substitutions = getSubstitutions(ir)
289
290    # TODO Remove the str form which is used by update_test_checks.py and
291    # update_llc_test_checks.py
292    # The safer list form is used by update_cc_test_checks.py
293    if preprocess_cmd:
294      # Allow pre-processing the IR file (e.g. using sed):
295      assert isinstance(preprocess_cmd, str)  # TODO: use a list instead of using shell
296      preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
297      if verbose:
298        print('Pre-processing input file: ', ir, " with command '",
299              preprocess_cmd, "'", sep="", file=sys.stderr)
300      # Python 2.7 doesn't have subprocess.DEVNULL:
301      with open(os.devnull, 'w') as devnull:
302        pp = subprocess.Popen(preprocess_cmd, shell=True, stdin=devnull,
303                              stdout=subprocess.PIPE)
304        ir_file = pp.stdout
305
306    if isinstance(cmd_args, list):
307      args = [applySubstitutions(a, substitutions) for a in cmd_args]
308      stdout = subprocess.check_output([exe] + args, stdin=ir_file)
309    else:
310      stdout = subprocess.check_output(exe + ' ' + applySubstitutions(cmd_args, substitutions),
311                                       shell=True, stdin=ir_file)
312    if sys.version_info[0] > 2:
313      # FYI, if you crashed here with a decode error, your run line probably
314      # results in bitcode or other binary format being written to the pipe.
315      # For an opt test, you probably want to add -S or -disable-output.
316      stdout = stdout.decode()
317  # Fix line endings to unix CR style.
318  return stdout.replace('\r\n', '\n')
319
320##### LLVM IR parser
321RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
322CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
323PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
324CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
325
326UTC_ARGS_KEY = 'UTC_ARGS:'
327UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
328UTC_ADVERT = 'NOTE: Assertions have been autogenerated by '
329UNUSED_NOTE = 'NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:'
330
331OPT_FUNCTION_RE = re.compile(
332    r'^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s():,]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.$-]+?)\s*'
333    r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$',
334    flags=(re.M | re.S))
335
336ANALYZE_FUNCTION_RE = re.compile(
337    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':'
338    r'\s*\n(?P<body>.*)$',
339    flags=(re.X | re.S))
340
341LV_DEBUG_RE = re.compile(
342    r'^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*'
343    r'\s*\n(?P<body>.*)$',
344    flags=(re.X | re.S))
345
346IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
347TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
348TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
349MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
350DEBUG_ONLY_ARG_RE = re.compile(r'-debug-only[= ]([^ ]+)')
351
352SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
353SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
354SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
355SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
356SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
357SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
358SCRUB_LOOP_COMMENT_RE = re.compile(
359    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
360SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
361
362SEPARATOR = '.'
363
364def error(msg, test_file=None):
365  if test_file:
366    msg = '{}: {}'.format(msg, test_file)
367  print('ERROR: {}'.format(msg), file=sys.stderr)
368
369def warn(msg, test_file=None):
370  if test_file:
371    msg = '{}: {}'.format(msg, test_file)
372  print('WARNING: {}'.format(msg), file=sys.stderr)
373
374def debug(*args, **kwargs):
375  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
376  if 'file' not in kwargs:
377    kwargs['file'] = sys.stderr
378  if _verbose:
379    print(*args, **kwargs)
380
381def find_run_lines(test, lines):
382  debug('Scanning for RUN lines in test file:', test)
383  raw_lines = [m.group(1)
384               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
385  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
386  for l in raw_lines[1:]:
387    if run_lines[-1].endswith('\\'):
388      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
389    else:
390      run_lines.append(l)
391  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
392  for l in run_lines:
393    debug('  RUN: {}'.format(l))
394  return run_lines
395
396def get_triple_from_march(march):
397  triples = {
398      'amdgcn': 'amdgcn',
399      'r600': 'r600',
400      'mips': 'mips',
401      'sparc': 'sparc',
402      'hexagon': 'hexagon',
403      've': 've',
404  }
405  for prefix, triple in triples.items():
406    if march.startswith(prefix):
407      return triple
408  print("Cannot find a triple. Assume 'x86'", file=sys.stderr)
409  return 'x86'
410
411def apply_filters(line, filters):
412  has_filter = False
413  for f in filters:
414    if not f.is_filter_out:
415      has_filter = True
416    if f.search(line):
417      return False if f.is_filter_out else True
418  # If we only used filter-out, keep the line, otherwise discard it since no
419  # filter matched.
420  return False if has_filter else True
421
422def do_filter(body, filters):
423  return body if not filters else '\n'.join(filter(
424    lambda line: apply_filters(line, filters), body.splitlines()))
425
426def scrub_body(body):
427  # Scrub runs of whitespace out of the assembly, but leave the leading
428  # whitespace in place.
429  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
430  # Expand the tabs used for indentation.
431  body = str.expandtabs(body, 2)
432  # Strip trailing whitespace.
433  body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
434  return body
435
436def do_scrub(body, scrubber, scrubber_args, extra):
437  if scrubber_args:
438    local_args = copy.deepcopy(scrubber_args)
439    local_args[0].extra_scrub = extra
440    return scrubber(body, *local_args)
441  return scrubber(body, *scrubber_args)
442
443# Build up a dictionary of all the function bodies.
444class function_body(object):
445  def __init__(self, string, extra, args_and_sig, attrs, func_name_separator):
446    self.scrub = string
447    self.extrascrub = extra
448    self.args_and_sig = args_and_sig
449    self.attrs = attrs
450    self.func_name_separator = func_name_separator
451  def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs, is_backend):
452    arg_names = set()
453    def drop_arg_names(match):
454      arg_names.add(match.group(variable_group_in_ir_value_match))
455      if match.group(attribute_group_in_ir_value_match):
456        attr = match.group(attribute_group_in_ir_value_match)
457      else:
458        attr = ''
459      return match.group(1) + attr + match.group(match.lastindex)
460    def repl_arg_names(match):
461      if match.group(variable_group_in_ir_value_match) is not None and match.group(variable_group_in_ir_value_match) in arg_names:
462        return match.group(1) + match.group(match.lastindex)
463      return match.group(1) + match.group(2) + match.group(match.lastindex)
464    if self.attrs != attrs:
465      return False
466    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
467    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
468    if ans0 != ans1:
469      return False
470    if is_backend:
471      # Check without replacements, the replacements are not applied to the
472      # body for backend checks.
473      return self.extrascrub == extrascrub
474
475    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
476    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
477    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
478    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
479    return es0 == es1
480
481  def __str__(self):
482    return self.scrub
483
484class FunctionTestBuilder:
485  def __init__(self, run_list, flags, scrubber_args, path):
486    self._verbose = flags.verbose
487    self._record_args = flags.function_signature
488    self._check_attributes = flags.check_attributes
489    # Strip double-quotes if input was read by UTC_ARGS
490    self._filters = list(map(lambda f: Filter(re.compile(f.pattern().strip('"'),
491                                                         f.flags()),
492                                              f.is_filter_out),
493                             flags.filters)) if flags.filters else []
494    self._scrubber_args = scrubber_args
495    self._path = path
496    # Strip double-quotes if input was read by UTC_ARGS
497    self._replace_value_regex = list(map(lambda x: x.strip('"'), flags.replace_value_regex))
498    self._func_dict = {}
499    self._func_order = {}
500    self._global_var_dict = {}
501    self._processed_prefixes = set()
502    for tuple in run_list:
503      for prefix in tuple[0]:
504        self._func_dict.update({prefix: dict()})
505        self._func_order.update({prefix: []})
506        self._global_var_dict.update({prefix: dict()})
507
508  def finish_and_get_func_dict(self):
509    for prefix in self.get_failed_prefixes():
510      warn('Prefix %s had conflicting output from different RUN lines for all functions in test %s' % (prefix,self._path,))
511    return self._func_dict
512
513  def func_order(self):
514    return self._func_order
515
516  def global_var_dict(self):
517    return self._global_var_dict
518
519  def is_filtered(self):
520    return bool(self._filters)
521
522  def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes, is_backend):
523    build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
524    for m in function_re.finditer(raw_tool_output):
525      if not m:
526        continue
527      func = m.group('func')
528      body = m.group('body')
529      # func_name_separator is the string that is placed right after function name at the
530      # beginning of assembly function definition. In most assemblies, that is just a
531      # colon: `foo:`. But, for example, in nvptx it is a brace: `foo(`. If is_backend is
532      # False, just assume that separator is an empty string.
533      if is_backend:
534        # Use ':' as default separator.
535        func_name_separator = m.group('func_name_separator') if 'func_name_separator' in m.groupdict() else ':'
536      else:
537        func_name_separator = ''
538      attrs = m.group('attrs') if self._check_attributes else ''
539      # Determine if we print arguments, the opening brace, or nothing after the
540      # function name
541      if self._record_args and 'args_and_sig' in m.groupdict():
542        args_and_sig = scrub_body(m.group('args_and_sig').strip())
543      elif 'args_and_sig' in m.groupdict():
544        args_and_sig = '('
545      else:
546        args_and_sig = ''
547      filtered_body = do_filter(body, self._filters)
548      scrubbed_body = do_scrub(filtered_body, scrubber, self._scrubber_args,
549                               extra=False)
550      scrubbed_extra = do_scrub(filtered_body, scrubber, self._scrubber_args,
551                                extra=True)
552      if 'analysis' in m.groupdict():
553        analysis = m.group('analysis')
554        if analysis.lower() != 'cost model analysis':
555          warn('Unsupported analysis mode: %r!' % (analysis,))
556      if func.startswith('stress'):
557        # We only use the last line of the function body for stress tests.
558        scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
559      if self._verbose:
560        print('Processing function: ' + func, file=sys.stderr)
561        for l in scrubbed_body.splitlines():
562          print('  ' + l, file=sys.stderr)
563      for prefix in prefixes:
564        # Replace function names matching the regex.
565        for regex in self._replace_value_regex:
566          # Pattern that matches capture groups in the regex in leftmost order.
567          group_regex = re.compile(r'\(.*?\)')
568          # Replace function name with regex.
569          match = re.match(regex, func)
570          if match:
571            func_repl = regex
572            # Replace any capture groups with their matched strings.
573            for g in match.groups():
574              func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
575            func = re.sub(func_repl, '{{' + func_repl + '}}', func)
576
577          # Replace all calls to regex matching functions.
578          matches = re.finditer(regex, scrubbed_body)
579          for match in matches:
580            func_repl = regex
581            # Replace any capture groups with their matched strings.
582            for g in match.groups():
583              func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
584            # Substitute function call names that match the regex with the same
585            # capture groups set.
586            scrubbed_body = re.sub(func_repl, '{{' + func_repl + '}}',
587                                   scrubbed_body)
588
589        if func in self._func_dict[prefix]:
590          if (self._func_dict[prefix][func] is not None and
591              (str(self._func_dict[prefix][func]) != scrubbed_body or
592               self._func_dict[prefix][func].args_and_sig != args_and_sig or
593               self._func_dict[prefix][func].attrs != attrs)):
594            if self._func_dict[prefix][func].is_same_except_arg_names(
595                scrubbed_extra,
596                args_and_sig,
597                attrs,
598                is_backend):
599              self._func_dict[prefix][func].scrub = scrubbed_extra
600              self._func_dict[prefix][func].args_and_sig = args_and_sig
601            else:
602              # This means a previous RUN line produced a body for this function
603              # that is different from the one produced by this current RUN line,
604              # so the body can't be common across RUN lines. We use None to
605              # indicate that.
606              self._func_dict[prefix][func] = None
607        else:
608          if prefix not in self._processed_prefixes:
609            self._func_dict[prefix][func] = function_body(
610                scrubbed_body, scrubbed_extra, args_and_sig, attrs,
611                func_name_separator)
612            self._func_order[prefix].append(func)
613          else:
614            # An earlier RUN line used this check prefixes but didn't produce
615            # a body for this function. This happens in Clang tests that use
616            # preprocesser directives to exclude individual functions from some
617            # RUN lines.
618            self._func_dict[prefix][func] = None
619
620  def processed_prefixes(self, prefixes):
621    """
622    Mark a set of prefixes as having had at least one applicable RUN line fully
623    processed. This is used to filter out function bodies that don't have
624    outputs for all RUN lines.
625    """
626    self._processed_prefixes.update(prefixes)
627
628  def get_failed_prefixes(self):
629    # This returns the list of those prefixes that failed to match any function,
630    # because there were conflicting bodies produced by different RUN lines, in
631    # all instances of the prefix.
632    for prefix in self._func_dict:
633      if (self._func_dict[prefix] and
634          (not [fct for fct in self._func_dict[prefix]
635                if self._func_dict[prefix][fct] is not None])):
636        yield prefix
637
638
639##### Generator of LLVM IR CHECK lines
640
641SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
642
643# TODO: We should also derive check lines for global, debug, loop declarations, etc..
644
645class NamelessValue:
646  def __init__(self, check_prefix, check_key, ir_prefix, ir_regexp,
647               global_ir_rhs_regexp, *, is_before_functions=False, is_number=False,
648               replace_number_with_counter=False):
649    self.check_prefix = check_prefix
650    self.check_key = check_key
651    self.ir_prefix = ir_prefix
652    self.ir_regexp = ir_regexp
653    self.global_ir_rhs_regexp = global_ir_rhs_regexp
654    self.is_before_functions = is_before_functions
655    self.is_number = is_number
656    # Some variable numbers (e.g. MCINST1234) will change based on unrelated
657    # modifications to LLVM, replace those with an incrementing counter.
658    self.replace_number_with_counter = replace_number_with_counter
659    self.variable_mapping = {}
660
661  # Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
662  def is_local_def_ir_value_match(self, match):
663    return self.ir_prefix == '%'
664
665  # Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'.
666  def is_global_scope_ir_value_match(self, match):
667    return self.global_ir_rhs_regexp is not None
668
669  # Return the IR prefix and check prefix we use for this kind or IR value,
670  # e.g., (%, TMP) for locals.
671  def get_ir_prefix_from_ir_value_match(self, match):
672    return self.ir_prefix, self.check_prefix
673
674  # Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals
675  def get_ir_regex_from_ir_value_re_match(self, match):
676    # for backwards compatibility we check locals with '.*'
677    if self.is_local_def_ir_value_match(match):
678      return '.*'
679    return self.ir_regexp
680
681  # Create a FileCheck variable name based on an IR name.
682  def get_value_name(self, var: str, check_prefix: str):
683    var = var.replace('!', '')
684    if self.replace_number_with_counter:
685      assert var.isdigit(), var
686      replacement = self.variable_mapping.get(var, None)
687      if replacement is None:
688        # Replace variable with an incrementing counter
689        replacement = str(len(self.variable_mapping) + 1)
690        self.variable_mapping[var] = replacement
691      var = replacement
692    # This is a nameless value, prepend check_prefix.
693    if var.isdigit():
694      var = check_prefix + var
695    else:
696      # This is a named value that clashes with the check_prefix, prepend with
697      # _prefix_filecheck_ir_name, if it has been defined.
698      if may_clash_with_default_check_prefix_name(check_prefix, var) and _prefix_filecheck_ir_name:
699        var = _prefix_filecheck_ir_name + var
700    var = var.replace('.', '_')
701    var = var.replace('-', '_')
702    return var.upper()
703
704  # Create a FileCheck variable from regex.
705  def get_value_definition(self, var, match):
706    # for backwards compatibility we check locals with '.*'
707    varname = self.get_value_name(var, self.check_prefix)
708    prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
709    if self.is_number:
710      regex = ''  # always capture a number in the default format
711      capture_start = '[[#'
712    else:
713      regex = self.get_ir_regex_from_ir_value_re_match(match)
714      capture_start = '[['
715    if self.is_local_def_ir_value_match(match):
716      return capture_start + varname + ':' + prefix + regex + ']]'
717    return prefix + capture_start + varname + ':' + regex + ']]'
718
719  # Use a FileCheck variable.
720  def get_value_use(self, var, match, var_prefix=None):
721    if var_prefix is None:
722      var_prefix = self.check_prefix
723    capture_start = '[[#' if self.is_number else '[['
724    if self.is_local_def_ir_value_match(match):
725      return capture_start + self.get_value_name(var, var_prefix) + ']]'
726    prefix = self.get_ir_prefix_from_ir_value_match(match)[0]
727    return prefix + capture_start + self.get_value_name(var, var_prefix) + ']]'
728
729# Description of the different "unnamed" values we match in the IR, e.g.,
730# (local) ssa values, (debug) metadata, etc.
731ir_nameless_values = [
732    #            check_prefix   check_key  ir_prefix           ir_regexp                global_ir_rhs_regexp
733    NamelessValue(r'TMP'        , '%' , r'%'                   , r'[\w$.-]+?'           , None                 ) ,
734    NamelessValue(r'ATTR'       , '#' , r'#'                   , r'[0-9]+'              , None                 ) ,
735    NamelessValue(r'ATTR'       , '#' , r'attributes #'        , r'[0-9]+'              , r'{[^}]*}'           ) ,
736    NamelessValue(r'GLOB'       , '@' , r'@'                   , r'[0-9]+'              , None                 ) ,
737    NamelessValue(r'GLOB'       , '@' , r'@'                   , r'[a-zA-Z0-9_$"\\.-]+' , r'.+'                , is_before_functions=True)  ,
738    NamelessValue(r'DBG'        , '!' , r'!dbg '               , r'![0-9]+'             , None                 ) ,
739    NamelessValue(r'DIASSIGNID' , '!' , r'!DIAssignID '        , r'![0-9]+'             , None                 ) ,
740    NamelessValue(r'PROF'       , '!' , r'!prof '              , r'![0-9]+'             , None                 ) ,
741    NamelessValue(r'TBAA'       , '!' , r'!tbaa '              , r'![0-9]+'             , None                 ) ,
742    NamelessValue(r'TBAA_STRUCT', '!' , r'!tbaa.struct '       , r'![0-9]+'             , None                 ) ,
743    NamelessValue(r'RNG'        , '!' , r'!range '             , r'![0-9]+'             , None                 ) ,
744    NamelessValue(r'LOOP'       , '!' , r'!llvm.loop '         , r'![0-9]+'             , None                 ) ,
745    NamelessValue(r'META'       , '!' , r'metadata '           , r'![0-9]+'             , None                 ) ,
746    NamelessValue(r'META'       , '!' , r''                    , r'![0-9]+'             , r'(?:distinct |)!.*' ) ,
747    NamelessValue(r'ACC_GRP'    , '!' , r'!llvm.access.group ' , r'![0-9]+'             , None                 ) ,
748]
749
750asm_nameless_values = [
751    NamelessValue(r'MCINST'     , 'Inst#' , '<MCInst #'        , r'\d+'                 , r'.+', is_number=True, replace_number_with_counter=True),
752    NamelessValue(r'MCREG'      , 'Reg:'  , '<MCOperand Reg:'  , r'\d+'                 , r'.+', is_number=True, replace_number_with_counter=True),
753]
754
755def createOrRegexp(old, new):
756  if not old:
757    return new
758  if not new:
759    return old
760  return old + '|' + new
761
762def createPrefixMatch(prefix_str, prefix_re):
763  return '(?:' + prefix_str + '(' + prefix_re + '))'
764
765# Build the regexp that matches an "IR value". This can be a local variable,
766# argument, global, or metadata, anything that is "named". It is important that
767# the PREFIX and SUFFIX below only contain a single group, if that changes
768# other locations will need adjustment as well.
769IR_VALUE_REGEXP_PREFIX = r'(\s*)'
770IR_VALUE_REGEXP_STRING = r''
771for nameless_value in ir_nameless_values:
772  match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
773  if nameless_value.global_ir_rhs_regexp is not None:
774    match = '^' + match
775  IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, match)
776IR_VALUE_REGEXP_SUFFIX = r'([,\s\(\)]|\Z)'
777IR_VALUE_RE = re.compile(IR_VALUE_REGEXP_PREFIX + r'(' + IR_VALUE_REGEXP_STRING + r')' + IR_VALUE_REGEXP_SUFFIX)
778
779# Build the regexp that matches an "ASM value" (currently only for --asm-show-inst comments).
780ASM_VALUE_REGEXP_STRING = ''
781for nameless_value in asm_nameless_values:
782  match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
783  ASM_VALUE_REGEXP_STRING = createOrRegexp(ASM_VALUE_REGEXP_STRING, match)
784ASM_VALUE_REGEXP_SUFFIX = r'([>\s]|\Z)'
785ASM_VALUE_RE = re.compile(r'((?:#|//)\s*)' + '(' + ASM_VALUE_REGEXP_STRING + ')' + ASM_VALUE_REGEXP_SUFFIX)
786
787# The entire match is group 0, the prefix has one group (=1), the entire
788# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
789first_nameless_group_in_ir_value_match = 3
790
791# constants for the group id of special matches
792variable_group_in_ir_value_match = 3
793attribute_group_in_ir_value_match = 4
794
795# Check a match for IR_VALUE_RE and inspect it to determine if it was a local
796# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
797def get_idx_from_ir_value_match(match):
798  for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
799    if match.group(i) is not None:
800      return i - first_nameless_group_in_ir_value_match
801  error("Unable to identify the kind of IR value from the match!")
802  return 0
803
804# See get_idx_from_ir_value_match
805def get_name_from_ir_value_match(match):
806  return match.group(get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match)
807
808def get_nameless_value_from_match(match, nameless_values) -> NamelessValue:
809  return nameless_values[get_idx_from_ir_value_match(match)]
810
811# Return true if var clashes with the scripted FileCheck check_prefix.
812def may_clash_with_default_check_prefix_name(check_prefix, var):
813  return check_prefix and re.match(r'^' + check_prefix + r'[0-9]+?$', var, re.IGNORECASE)
814
815def generalize_check_lines_common(lines, is_analyze, vars_seen,
816                                  global_vars_seen, nameless_values,
817                                  nameless_value_regex, is_asm):
818  # This gets called for each match that occurs in
819  # a line. We transform variables we haven't seen
820  # into defs, and variables we have seen into uses.
821  def transform_line_vars(match):
822    var = get_name_from_ir_value_match(match)
823    nameless_value = get_nameless_value_from_match(match, nameless_values)
824    if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
825      warn("Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
826           " with scripted FileCheck name." % (var,))
827    key = (var, nameless_value.check_key)
828    is_local_def = nameless_value.is_local_def_ir_value_match(match)
829    if is_local_def and key in vars_seen:
830      rv = nameless_value.get_value_use(var, match)
831    elif not is_local_def and key in global_vars_seen:
832      # We could have seen a different prefix for the global variables first,
833      # ensure we use that one instead of the prefix for the current match.
834      rv = nameless_value.get_value_use(var, match, global_vars_seen[key])
835    else:
836      if is_local_def:
837        vars_seen.add(key)
838      else:
839        global_vars_seen[key] = nameless_value.check_prefix
840      rv = nameless_value.get_value_definition(var, match)
841    # re.sub replaces the entire regex match
842    # with whatever you return, so we have
843    # to make sure to hand it back everything
844    # including the commas and spaces.
845    return match.group(1) + rv + match.group(match.lastindex)
846
847  lines_with_def = []
848
849  for i, line in enumerate(lines):
850    if not is_asm:
851      # An IR variable named '%.' matches the FileCheck regex string.
852      line = line.replace('%.', '%dot')
853      for regex in _global_hex_value_regex:
854        if re.match('^@' + regex + ' = ', line):
855          line = re.sub(r'\bi([0-9]+) ([0-9]+)',
856              lambda m : 'i' + m.group(1) + ' [[#' + hex(int(m.group(2))) + ']]',
857              line)
858          break
859      # Ignore any comments, since the check lines will too.
860      scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
861      lines[i] = scrubbed_line
862    if is_asm or not is_analyze:
863      # It can happen that two matches are back-to-back and for some reason sub
864      # will not replace both of them. For now we work around this by
865      # substituting until there is no more match.
866      changed = True
867      while changed:
868        (lines[i], changed) = nameless_value_regex.subn(transform_line_vars,
869                                                        lines[i], count=1)
870  return lines
871
872# Replace IR value defs and uses with FileCheck variables.
873def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen):
874  return generalize_check_lines_common(lines, is_analyze, vars_seen,
875                                       global_vars_seen, ir_nameless_values,
876                                       IR_VALUE_RE, False)
877
878def generalize_asm_check_lines(lines, vars_seen, global_vars_seen):
879  return generalize_check_lines_common(lines, False, vars_seen,
880                                       global_vars_seen, asm_nameless_values,
881                                       ASM_VALUE_RE, True)
882
883def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_backend, is_analyze, global_vars_seen_dict, is_filtered):
884  # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
885  prefix_exclusions = set()
886  printed_prefixes = []
887  for p in prefix_list:
888    checkprefixes = p[0]
889    # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
890    # exist for this run line. A subset of the check prefixes might know about the function but only because
891    # other run lines created it.
892    if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
893      prefix_exclusions |= set(checkprefixes)
894      continue
895
896  # prefix_exclusions is constructed, we can now emit the output
897  for p in prefix_list:
898    global_vars_seen = {}
899    checkprefixes = p[0]
900    for checkprefix in checkprefixes:
901      if checkprefix in global_vars_seen_dict:
902        global_vars_seen.update(global_vars_seen_dict[checkprefix])
903      else:
904        global_vars_seen_dict[checkprefix] = {}
905      if checkprefix in printed_prefixes:
906        break
907
908      # Check if the prefix is excluded.
909      if checkprefix in prefix_exclusions:
910        continue
911
912      # If we do not have output for this prefix we skip it.
913      if not func_dict[checkprefix][func_name]:
914        continue
915
916      # Add some space between different check prefixes, but not after the last
917      # check line (before the test code).
918      if is_backend:
919        if len(printed_prefixes) != 0:
920          output_lines.append(comment_marker)
921
922      if checkprefix not in global_vars_seen_dict:
923        global_vars_seen_dict[checkprefix] = {}
924
925      global_vars_seen_before = [key for key in global_vars_seen.keys()]
926
927      vars_seen = set()
928      printed_prefixes.append(checkprefix)
929      attrs = str(func_dict[checkprefix][func_name].attrs)
930      attrs = '' if attrs == 'None' else attrs
931      if attrs:
932        output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs))
933      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
934      if args_and_sig:
935        args_and_sig = generalize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0]
936      func_name_separator = func_dict[checkprefix][func_name].func_name_separator
937      if '[[' in args_and_sig:
938        output_lines.append(check_label_format % (checkprefix, func_name, '', func_name_separator))
939        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
940      else:
941        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig, func_name_separator))
942      func_body = str(func_dict[checkprefix][func_name]).splitlines()
943      if not func_body:
944        # We have filtered everything.
945        continue
946
947      # For ASM output, just emit the check lines.
948      if is_backend:
949        body_start = 1
950        if is_filtered:
951          # For filtered output we don't add "-NEXT" so don't add extra spaces
952          # before the first line.
953          body_start = 0
954        else:
955          output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
956        func_lines = generalize_asm_check_lines(func_body[body_start:],
957                                                vars_seen, global_vars_seen)
958        for func_line in func_lines:
959          if func_line.strip() == '':
960            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
961          else:
962            check_suffix = '-NEXT' if not is_filtered else ''
963            output_lines.append('%s %s%s:  %s' % (comment_marker, checkprefix,
964                                                  check_suffix, func_line))
965        # Remember new global variables we have not seen before
966        for key in global_vars_seen:
967          if key not in global_vars_seen_before:
968            global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
969        break
970
971      # For IR output, change all defs to FileCheck variables, so we're immune
972      # to variable naming fashions.
973      func_body = generalize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen)
974
975      # This could be selectively enabled with an optional invocation argument.
976      # Disabled for now: better to check everything. Be safe rather than sorry.
977
978      # Handle the first line of the function body as a special case because
979      # it's often just noise (a useless asm comment or entry label).
980      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
981      #  is_blank_line = True
982      #else:
983      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
984      #  is_blank_line = False
985
986      is_blank_line = False
987
988      for func_line in func_body:
989        if func_line.strip() == '':
990          is_blank_line = True
991          continue
992        # Do not waste time checking IR comments.
993        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
994
995        # Skip blank lines instead of checking them.
996        if is_blank_line:
997          output_lines.append('{} {}:       {}'.format(
998              comment_marker, checkprefix, func_line))
999        else:
1000          check_suffix = '-NEXT' if not is_filtered else ''
1001          output_lines.append('{} {}{}:  {}'.format(
1002              comment_marker, checkprefix, check_suffix, func_line))
1003        is_blank_line = False
1004
1005      # Add space between different check prefixes and also before the first
1006      # line of code in the test function.
1007      output_lines.append(comment_marker)
1008
1009      # Remember new global variables we have not seen before
1010      for key in global_vars_seen:
1011        if key not in global_vars_seen_before:
1012          global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1013      break
1014  return printed_prefixes
1015
1016def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
1017                  func_name, preserve_names, function_sig,
1018                  global_vars_seen_dict, is_filtered):
1019  # Label format is based on IR string.
1020  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
1021  check_label_format = '{} %s-LABEL: {}@%s%s%s'.format(comment_marker, function_def_regex)
1022  return add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
1023                    check_label_format, False, preserve_names, global_vars_seen_dict,
1024                    is_filtered)
1025
1026def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered):
1027  check_label_format = '{} %s-LABEL: \'%s%s%s\''.format(comment_marker)
1028  global_vars_seen_dict = {}
1029  return add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
1030                    check_label_format, False, True, global_vars_seen_dict,
1031                    is_filtered)
1032
1033def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
1034  for nameless_value in itertools.chain(ir_nameless_values, asm_nameless_values):
1035    if nameless_value.global_ir_rhs_regexp is None:
1036      continue
1037
1038    lhs_re_str = nameless_value.ir_prefix + nameless_value.ir_regexp
1039    rhs_re_str = nameless_value.global_ir_rhs_regexp
1040
1041    global_ir_value_re_str = r'^' + lhs_re_str + r'\s=\s' + rhs_re_str + r'$'
1042    global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
1043    lines = []
1044    for m in global_ir_value_re.finditer(raw_tool_output):
1045      lines.append(m.group(0))
1046
1047    for prefix in prefixes:
1048      if glob_val_dict[prefix] is None:
1049        continue
1050      if nameless_value.check_prefix in glob_val_dict[prefix]:
1051        if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
1052          continue
1053        if prefix == prefixes[-1]:
1054          warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
1055        else:
1056          glob_val_dict[prefix][nameless_value.check_prefix] = None
1057          continue
1058      glob_val_dict[prefix][nameless_value.check_prefix] = lines
1059
1060def add_global_checks(glob_val_dict, comment_marker, prefix_list, output_lines, global_vars_seen_dict, is_analyze, is_before_functions):
1061  printed_prefixes = set()
1062  for nameless_value in ir_nameless_values:
1063    if nameless_value.global_ir_rhs_regexp is None:
1064      continue
1065    if nameless_value.is_before_functions != is_before_functions:
1066      continue
1067    for p in prefix_list:
1068      global_vars_seen = {}
1069      checkprefixes = p[0]
1070      if checkprefixes is None:
1071        continue
1072      for checkprefix in checkprefixes:
1073        if checkprefix in global_vars_seen_dict:
1074          global_vars_seen.update(global_vars_seen_dict[checkprefix])
1075        else:
1076          global_vars_seen_dict[checkprefix] = {}
1077        if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
1078          break
1079        if not glob_val_dict[checkprefix]:
1080          continue
1081        if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
1082          continue
1083        if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
1084          continue
1085
1086        check_lines = []
1087        global_vars_seen_before = [key for key in global_vars_seen.keys()]
1088        for line in glob_val_dict[checkprefix][nameless_value.check_prefix]:
1089          if _global_value_regex:
1090            matched = False
1091            for regex in _global_value_regex:
1092              if re.match('^@' + regex + ' = ', line):
1093                matched = True
1094                break
1095            if not matched:
1096              continue
1097          tmp = generalize_check_lines([line], is_analyze, set(), global_vars_seen)
1098          check_line = '%s %s: %s' % (comment_marker, checkprefix, tmp[0])
1099          check_lines.append(check_line)
1100        if not check_lines:
1101          continue
1102
1103        output_lines.append(comment_marker + SEPARATOR)
1104        for check_line in check_lines:
1105          output_lines.append(check_line)
1106
1107        printed_prefixes.add((checkprefix, nameless_value.check_prefix))
1108
1109        # Remembe new global variables we have not seen before
1110        for key in global_vars_seen:
1111          if key not in global_vars_seen_before:
1112            global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
1113        break
1114
1115  if printed_prefixes:
1116    output_lines.append(comment_marker + SEPARATOR)
1117  return printed_prefixes
1118
1119
1120def check_prefix(prefix):
1121  if not PREFIX_RE.match(prefix):
1122    hint = ""
1123    if ',' in prefix:
1124      hint = " Did you mean '--check-prefixes=" + prefix + "'?"
1125    warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
1126         (prefix))
1127
1128
1129def verify_filecheck_prefixes(fc_cmd):
1130  fc_cmd_parts = fc_cmd.split()
1131  for part in fc_cmd_parts:
1132    if "check-prefix=" in part:
1133      prefix = part.split('=', 1)[1]
1134      check_prefix(prefix)
1135    elif "check-prefixes=" in part:
1136      prefixes = part.split('=', 1)[1].split(',')
1137      for prefix in prefixes:
1138        check_prefix(prefix)
1139        if prefixes.count(prefix) > 1:
1140          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
1141
1142
1143def get_autogennote_suffix(parser, args):
1144  autogenerated_note_args = ''
1145  for action in parser._actions:
1146    if not hasattr(args, action.dest):
1147      continue  # Ignore options such as --help that aren't included in args
1148    # Ignore parameters such as paths to the binary or the list of tests
1149    if action.dest in ('tests', 'update_only', 'tool_binary', 'opt_binary',
1150                       'llc_binary', 'clang', 'opt', 'llvm_bin', 'verbose',
1151                       'force_update'):
1152      continue
1153    value = getattr(args, action.dest)
1154    if action.const is not None:  # action stores a constant (usually True/False)
1155      # Skip actions with different constant values (this happens with boolean
1156      # --foo/--no-foo options)
1157      if value != action.const:
1158        continue
1159    if parser.get_default(action.dest) == value:
1160      continue  # Don't add default values
1161    if action.dest == 'filters':
1162      # Create a separate option for each filter element.  The value is a list
1163      # of Filter objects.
1164      for elem in value:
1165        opt_name = 'filter-out' if elem.is_filter_out else 'filter'
1166        opt_value = elem.pattern()
1167        new_arg = '--%s "%s" ' % (opt_name, opt_value.strip('"'))
1168        if new_arg not in autogenerated_note_args:
1169          autogenerated_note_args += new_arg
1170    else:
1171      autogenerated_note_args += action.option_strings[0] + ' '
1172      if action.const is None:  # action takes a parameter
1173        if action.nargs == '+':
1174          value = ' '.join(map(lambda v: '"' + v.strip('"') + '"', value))
1175        autogenerated_note_args += '%s ' % value
1176  if autogenerated_note_args:
1177    autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
1178  return autogenerated_note_args
1179
1180
1181def check_for_command(line, parser, args, argv, argparse_callback):
1182  cmd_m = UTC_ARGS_CMD.match(line)
1183  if cmd_m:
1184    for option in shlex.split(cmd_m.group('cmd').strip()):
1185      if option:
1186        argv.append(option)
1187    args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
1188    if argparse_callback is not None:
1189      argparse_callback(args)
1190  return args, argv
1191
1192def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
1193  result = get_arg_to_check(test_info.args)
1194  if not result and is_global:
1195    # See if this has been specified via UTC_ARGS.  This is a "global" option
1196    # that affects the entire generation of test checks.  If it exists anywhere
1197    # in the test, apply it to everything.
1198    saw_line = False
1199    for line_info in test_info.ro_iterlines():
1200      line = line_info.line
1201      if not line.startswith(';') and line.strip() != '':
1202        saw_line = True
1203      result = get_arg_to_check(line_info.args)
1204      if result:
1205        if warn and saw_line:
1206          # We saw the option after already reading some test input lines.
1207          # Warn about it.
1208          print('WARNING: Found {} in line following test start: '.format(arg_string)
1209                + line, file=sys.stderr)
1210          print('WARNING: Consider moving {} to top of file'.format(arg_string),
1211                file=sys.stderr)
1212        break
1213  return result
1214
1215def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
1216  for input_line_info in test_info.iterlines(output_lines):
1217    line = input_line_info.line
1218    args = input_line_info.args
1219    if line.strip() == comment_string:
1220      continue
1221    if line.strip() == comment_string + SEPARATOR:
1222      continue
1223    if line.lstrip().startswith(comment_string):
1224      m = CHECK_RE.match(line)
1225      if m and m.group(1) in prefix_set:
1226        continue
1227    output_lines.append(line.rstrip('\n'))
1228
1229def add_checks_at_end(output_lines, prefix_list, func_order,
1230                      comment_string, check_generator):
1231  added = set()
1232  generated_prefixes = set()
1233  for prefix in prefix_list:
1234    prefixes = prefix[0]
1235    tool_args = prefix[1]
1236    for prefix in prefixes:
1237      for func in func_order[prefix]:
1238        # The func order can contain the same functions multiple times.
1239        # If we see one again we are done.
1240        if (func, prefix) in added:
1241          continue
1242        if added:
1243          output_lines.append(comment_string)
1244
1245        # The add_*_checks routines expect a run list whose items are
1246        # tuples that have a list of prefixes as their first element and
1247        # tool command args string as their second element.  They output
1248        # checks for each prefix in the list of prefixes.  By doing so, it
1249        # implicitly assumes that for each function every run line will
1250        # generate something for that function.  That is not the case for
1251        # generated functions as some run lines might not generate them
1252        # (e.g. -fopenmp vs. no -fopenmp).
1253        #
1254        # Therefore, pass just the prefix we're interested in.  This has
1255        # the effect of generating all of the checks for functions of a
1256        # single prefix before moving on to the next prefix.  So checks
1257        # are ordered by prefix instead of by function as in "normal"
1258        # mode.
1259        for generated_prefix in check_generator(output_lines,
1260                        [([prefix], tool_args)], func):
1261          added.add((func, generated_prefix))
1262          generated_prefixes.add(generated_prefix)
1263  return generated_prefixes
1264