xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision 8bc2c662d9c0f241fb8538979f8db1af7f2e353e)
1from __future__ import print_function
2
3import copy
4import glob
5import re
6import subprocess
7import sys
8
9if sys.version_info[0] > 2:
10  class string:
11    expandtabs = str.expandtabs
12else:
13  import string
14
15##### Common utilities for update_*test_checks.py
16
17
18_verbose = False
19_prefix_filecheck_ir_name = ''
20
21def parse_commandline_args(parser):
22  parser.add_argument('--include-generated-funcs', action='store_true',
23                      help='Output checks for functions not in source')
24  parser.add_argument('-v', '--verbose', action='store_true',
25                      help='Show verbose output')
26  parser.add_argument('-u', '--update-only', action='store_true',
27                      help='Only update test if it was already autogened')
28  parser.add_argument('--force-update', action='store_true',
29                      help='Update test even if it was autogened by a different script')
30  parser.add_argument('--enable', action='store_true', dest='enabled', default=True,
31                       help='Activate CHECK line generation from this point forward')
32  parser.add_argument('--disable', action='store_false', dest='enabled',
33                      help='Deactivate CHECK line generation from this point forward')
34  parser.add_argument('--replace-function-regex', nargs='+', default=[],
35                      help='List of regular expressions to replace matching function names')
36  parser.add_argument('--prefix-filecheck-ir-name', default='',
37                      help='Add a prefix to FileCheck IR value names to avoid conflicts with scripted names')
38  args = parser.parse_args()
39  global _verbose
40  _verbose = args.verbose
41  return args
42
43
44class InputLineInfo(object):
45  def __init__(self, line, line_number, args, argv):
46    self.line = line
47    self.line_number = line_number
48    self.args = args
49    self.argv = argv
50
51
52class TestInfo(object):
53  def __init__(self, test, parser, script_name, input_lines, args, argv,
54               comment_prefix, argparse_callback):
55    self.parser = parser
56    self.argparse_callback = argparse_callback
57    self.path = test
58    self.args = args
59    if args.prefix_filecheck_ir_name:
60      global _prefix_filecheck_ir_name
61      _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
62    self.argv = argv
63    self.input_lines = input_lines
64    self.run_lines = find_run_lines(test, self.input_lines)
65    self.comment_prefix = comment_prefix
66    if self.comment_prefix is None:
67      if self.path.endswith('.mir'):
68        self.comment_prefix = '#'
69      else:
70        self.comment_prefix = ';'
71    self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT
72    self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
73    self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
74
75  def ro_iterlines(self):
76    for line_num, input_line in enumerate(self.input_lines):
77      args, argv = check_for_command(input_line, self.parser,
78                                     self.args, self.argv, self.argparse_callback)
79      yield InputLineInfo(input_line, line_num, args, argv)
80
81  def iterlines(self, output_lines):
82    output_lines.append(self.test_autogenerated_note)
83    for line_info in self.ro_iterlines():
84      input_line = line_info.line
85      # Discard any previous script advertising.
86      if input_line.startswith(self.autogenerated_note_prefix):
87        continue
88      self.args = line_info.args
89      self.argv = line_info.argv
90      if not self.args.enabled:
91        output_lines.append(input_line)
92        continue
93      yield line_info
94
95def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None):
96  for pattern in test_patterns:
97    # On Windows we must expand the patterns ourselves.
98    tests_list = glob.glob(pattern)
99    if not tests_list:
100      warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
101      continue
102    for test in tests_list:
103      with open(test) as f:
104        input_lines = [l.rstrip() for l in f]
105      args = parser.parse_args()
106      if argparse_callback is not None:
107        argparse_callback(args)
108      argv = sys.argv[:]
109      first_line = input_lines[0] if input_lines else ""
110      if UTC_ADVERT in first_line:
111        if script_name not in first_line and not args.force_update:
112          warn("Skipping test which wasn't autogenerated by " + script_name, test)
113          continue
114        args, argv = check_for_command(first_line, parser, args, argv, argparse_callback)
115      elif args.update_only:
116        assert UTC_ADVERT not in first_line
117        warn("Skipping test which isn't autogenerated: " + test)
118        continue
119      yield TestInfo(test, parser, script_name, input_lines, args, argv,
120                     comment_prefix, argparse_callback)
121
122
123def should_add_line_to_output(input_line, prefix_set, skip_global_checks = False):
124  # Skip any blank comment lines in the IR.
125  if not skip_global_checks and input_line.strip() == ';':
126    return False
127  # Skip a special double comment line we use as a separator.
128  if input_line.strip() == SEPARATOR:
129    return False
130  # Skip any blank lines in the IR.
131  #if input_line.strip() == '':
132  #  return False
133  # And skip any CHECK lines. We're building our own.
134  m = CHECK_RE.match(input_line)
135  if m and m.group(1) in prefix_set:
136    if skip_global_checks:
137      global_ir_value_re = re.compile('\[\[', flags=(re.M))
138      return not global_ir_value_re.search(input_line)
139    return False
140
141  return True
142
143# Invoke the tool that is being tested.
144def invoke_tool(exe, cmd_args, ir):
145  with open(ir) as ir_file:
146    # TODO Remove the str form which is used by update_test_checks.py and
147    # update_llc_test_checks.py
148    # The safer list form is used by update_cc_test_checks.py
149    if isinstance(cmd_args, list):
150      stdout = subprocess.check_output([exe] + cmd_args, stdin=ir_file)
151    else:
152      stdout = subprocess.check_output(exe + ' ' + cmd_args,
153                                       shell=True, stdin=ir_file)
154    if sys.version_info[0] > 2:
155      stdout = stdout.decode()
156  # Fix line endings to unix CR style.
157  return stdout.replace('\r\n', '\n')
158
159##### LLVM IR parser
160RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
161CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
162PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
163CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
164
165UTC_ARGS_KEY = 'UTC_ARGS:'
166UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
167UTC_ADVERT = 'NOTE: Assertions have been autogenerated by '
168
169OPT_FUNCTION_RE = re.compile(
170    r'^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.$-]+?)\s*'
171    r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$',
172    flags=(re.M | re.S))
173
174ANALYZE_FUNCTION_RE = re.compile(
175    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':'
176    r'\s*\n(?P<body>.*)$',
177    flags=(re.X | re.S))
178
179IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
180TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
181TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
182MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
183
184SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
185SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
186SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
187SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
188SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
189SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
190SCRUB_LOOP_COMMENT_RE = re.compile(
191    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
192SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
193
194SEPARATOR = ';.'
195
196def error(msg, test_file=None):
197  if test_file:
198    msg = '{}: {}'.format(msg, test_file)
199  print('ERROR: {}'.format(msg), file=sys.stderr)
200
201def warn(msg, test_file=None):
202  if test_file:
203    msg = '{}: {}'.format(msg, test_file)
204  print('WARNING: {}'.format(msg), file=sys.stderr)
205
206def debug(*args, **kwargs):
207  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
208  if 'file' not in kwargs:
209    kwargs['file'] = sys.stderr
210  if _verbose:
211    print(*args, **kwargs)
212
213def find_run_lines(test, lines):
214  debug('Scanning for RUN lines in test file:', test)
215  raw_lines = [m.group(1)
216               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
217  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
218  for l in raw_lines[1:]:
219    if run_lines[-1].endswith('\\'):
220      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
221    else:
222      run_lines.append(l)
223  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
224  for l in run_lines:
225    debug('  RUN: {}'.format(l))
226  return run_lines
227
228def scrub_body(body):
229  # Scrub runs of whitespace out of the assembly, but leave the leading
230  # whitespace in place.
231  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
232  # Expand the tabs used for indentation.
233  body = string.expandtabs(body, 2)
234  # Strip trailing whitespace.
235  body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
236  return body
237
238def do_scrub(body, scrubber, scrubber_args, extra):
239  if scrubber_args:
240    local_args = copy.deepcopy(scrubber_args)
241    local_args[0].extra_scrub = extra
242    return scrubber(body, *local_args)
243  return scrubber(body, *scrubber_args)
244
245# Build up a dictionary of all the function bodies.
246class function_body(object):
247  def __init__(self, string, extra, args_and_sig, attrs):
248    self.scrub = string
249    self.extrascrub = extra
250    self.args_and_sig = args_and_sig
251    self.attrs = attrs
252  def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs):
253    arg_names = set()
254    def drop_arg_names(match):
255        arg_names.add(match.group(variable_group_in_ir_value_match))
256        if match.group(attribute_group_in_ir_value_match):
257            attr = match.group(attribute_group_in_ir_value_match)
258        else:
259            attr = ''
260        return match.group(1) + attr + match.group(match.lastindex)
261    def repl_arg_names(match):
262        if match.group(variable_group_in_ir_value_match) is not None and match.group(variable_group_in_ir_value_match) in arg_names:
263            return match.group(1) + match.group(match.lastindex)
264        return match.group(1) + match.group(2) + match.group(match.lastindex)
265    if self.attrs != attrs:
266      return False
267    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
268    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
269    if ans0 != ans1:
270        return False
271    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
272    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
273    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
274    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
275    return es0 == es1
276
277  def __str__(self):
278    return self.scrub
279
280class FunctionTestBuilder:
281  def __init__(self, run_list, flags, scrubber_args):
282    self._verbose = flags.verbose
283    self._record_args = flags.function_signature
284    self._check_attributes = flags.check_attributes
285    self._scrubber_args = scrubber_args
286    # Strip double-quotes if input was read by UTC_ARGS
287    self._replace_function_regex = list(map(lambda x: x.strip('"'), flags.replace_function_regex))
288    self._func_dict = {}
289    self._func_order = {}
290    self._global_var_dict = {}
291    for tuple in run_list:
292      for prefix in tuple[0]:
293        self._func_dict.update({prefix:dict()})
294        self._func_order.update({prefix: []})
295        self._global_var_dict.update({prefix:dict()})
296
297  def finish_and_get_func_dict(self):
298    for prefix in self._get_failed_prefixes():
299      warn('Prefix %s had conflicting output from different RUN lines for all functions' % (prefix,))
300    return self._func_dict
301
302  def func_order(self):
303    return self._func_order
304
305  def global_var_dict(self):
306    return self._global_var_dict
307
308  def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes):
309    build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
310    for m in function_re.finditer(raw_tool_output):
311      if not m:
312        continue
313      func = m.group('func')
314      body = m.group('body')
315      attrs = m.group('attrs') if self._check_attributes else ''
316      # Determine if we print arguments, the opening brace, or nothing after the
317      # function name
318      if self._record_args and 'args_and_sig' in m.groupdict():
319          args_and_sig = scrub_body(m.group('args_and_sig').strip())
320      elif 'args_and_sig' in m.groupdict():
321          args_and_sig = '('
322      else:
323          args_and_sig = ''
324      scrubbed_body = do_scrub(body, scrubber, self._scrubber_args,
325                               extra=False)
326      scrubbed_extra = do_scrub(body, scrubber, self._scrubber_args,
327                                extra=True)
328      if 'analysis' in m.groupdict():
329        analysis = m.group('analysis')
330        if analysis.lower() != 'cost model analysis':
331          warn('Unsupported analysis mode: %r!' % (analysis,))
332      if func.startswith('stress'):
333        # We only use the last line of the function body for stress tests.
334        scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
335      if self._verbose:
336        print('Processing function: ' + func, file=sys.stderr)
337        for l in scrubbed_body.splitlines():
338          print('  ' + l, file=sys.stderr)
339      for prefix in prefixes:
340        if func in self._func_dict[prefix]:
341          if (self._func_dict[prefix][func] is None or
342              str(self._func_dict[prefix][func]) != scrubbed_body or
343              self._func_dict[prefix][func].args_and_sig != args_and_sig or
344                  self._func_dict[prefix][func].attrs != attrs):
345            if (self._func_dict[prefix][func] is not None and
346                self._func_dict[prefix][func].is_same_except_arg_names(
347                scrubbed_extra,
348                args_and_sig,
349                attrs)):
350              self._func_dict[prefix][func].scrub = scrubbed_extra
351              self._func_dict[prefix][func].args_and_sig = args_and_sig
352              continue
353            else:
354              # This means a previous RUN line produced a body for this function
355              # that is different from the one produced by this current RUN line,
356              # so the body can't be common accross RUN lines. We use None to
357              # indicate that.
358              self._func_dict[prefix][func] = None
359              continue
360
361        # Replace function names matching the regex.
362        for regex in self._replace_function_regex:
363          # Pattern that matches capture groups in the regex in leftmost order.
364          group_regex = re.compile('\(.*?\)')
365          # Replace function name with regex.
366          match = re.match(regex, func)
367          if match:
368            func_repl = regex
369            # Replace any capture groups with their matched strings.
370            for g in match.groups():
371              func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
372            func = re.sub(func_repl, '{{' + func_repl + '}}', func)
373
374          # Replace all calls to regex matching functions.
375          matches = re.finditer(regex, scrubbed_body)
376          for match in matches:
377            func_repl = regex
378            # Replace any capture groups with their matched strings.
379            for g in match.groups():
380                func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
381            # Substitute function call names that match the regex with the same
382            # capture groups set.
383            scrubbed_body = re.sub(func_repl, '{{' + func_repl + '}}', scrubbed_body)
384
385        self._func_dict[prefix][func] = function_body(
386            scrubbed_body, scrubbed_extra, args_and_sig, attrs)
387        self._func_order[prefix].append(func)
388
389  def _get_failed_prefixes(self):
390    # This returns the list of those prefixes that failed to match any function,
391    # because there were conflicting bodies produced by different RUN lines, in
392    # all instances of the prefix. Effectively, this prefix is unused and should
393    # be removed.
394    for prefix in self._func_dict:
395      if (self._func_dict[prefix] and
396          (not [fct for fct in self._func_dict[prefix]
397                if self._func_dict[prefix][fct] is not None])):
398        yield prefix
399
400
401##### Generator of LLVM IR CHECK lines
402
403SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
404
405# TODO: We should also derive check lines for global, debug, loop declarations, etc..
406
407class NamelessValue:
408    def __init__(self, check_prefix, check_key, ir_prefix, global_ir_prefix, global_ir_prefix_regexp,
409                 ir_regexp, global_ir_rhs_regexp, is_before_functions):
410        self.check_prefix = check_prefix
411        self.check_key = check_key
412        self.ir_prefix = ir_prefix
413        self.global_ir_prefix = global_ir_prefix
414        self.global_ir_prefix_regexp = global_ir_prefix_regexp
415        self.ir_regexp = ir_regexp
416        self.global_ir_rhs_regexp = global_ir_rhs_regexp
417        self.is_before_functions = is_before_functions
418
419# Description of the different "unnamed" values we match in the IR, e.g.,
420# (local) ssa values, (debug) metadata, etc.
421nameless_values = [
422    NamelessValue(r'TMP'  , '%' , r'%'           , None            , None                   , r'[\w$.-]+?' , None                 , False) ,
423    NamelessValue(r'ATTR' , '#' , r'#'           , None            , None                   , r'[0-9]+'    , None                 , False) ,
424    NamelessValue(r'ATTR' , '#' , None           , r'attributes #' , r'[0-9]+'              , None         , r'{[^}]*}'           , False) ,
425    NamelessValue(r'GLOB' , '@' , r'@'           , None            , None                   , r'[0-9]+'    , None                 , False) ,
426    NamelessValue(r'GLOB' , '@' , None           , r'@'            , r'[a-zA-Z0-9_$"\\.-]+' , None         , r'.+'                , True)  ,
427    NamelessValue(r'DBG'  , '!' , r'!dbg '       , None            , None                   , r'![0-9]+'   , None                 , False) ,
428    NamelessValue(r'TBAA' , '!' , r'!tbaa '      , None            , None                   , r'![0-9]+'   , None                 , False) ,
429    NamelessValue(r'RNG'  , '!' , r'!range '     , None            , None                   , r'![0-9]+'   , None                 , False) ,
430    NamelessValue(r'LOOP' , '!' , r'!llvm.loop ' , None            , None                   , r'![0-9]+'   , None                 , False) ,
431    NamelessValue(r'META' , '!' , r'metadata '   , None            , None                   , r'![0-9]+'   , None                 , False) ,
432    NamelessValue(r'META' , '!' , None           , r''             , r'![0-9]+'             , None         , r'(?:distinct |)!.*' , False) ,
433]
434
435def createOrRegexp(old, new):
436    if not old:
437        return new
438    if not new:
439        return old
440    return old + '|' + new
441
442def createPrefixMatch(prefix_str, prefix_re):
443    if prefix_str is None or prefix_re is None:
444        return ''
445    return '(?:' + prefix_str + '(' + prefix_re + '))'
446
447# Build the regexp that matches an "IR value". This can be a local variable,
448# argument, global, or metadata, anything that is "named". It is important that
449# the PREFIX and SUFFIX below only contain a single group, if that changes
450# other locations will need adjustment as well.
451IR_VALUE_REGEXP_PREFIX = r'(\s*)'
452IR_VALUE_REGEXP_STRING = r''
453for nameless_value in nameless_values:
454    lcl_match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
455    glb_match = createPrefixMatch(nameless_value.global_ir_prefix, nameless_value.global_ir_prefix_regexp)
456    assert((lcl_match or glb_match) and not (lcl_match and glb_match))
457    if lcl_match:
458        IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, lcl_match)
459    elif glb_match:
460        IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, '^' + glb_match)
461IR_VALUE_REGEXP_SUFFIX = r'([,\s\(\)]|\Z)'
462IR_VALUE_RE = re.compile(IR_VALUE_REGEXP_PREFIX + r'(' + IR_VALUE_REGEXP_STRING + r')' + IR_VALUE_REGEXP_SUFFIX)
463
464# The entire match is group 0, the prefix has one group (=1), the entire
465# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
466first_nameless_group_in_ir_value_match = 3
467
468# constants for the group id of special matches
469variable_group_in_ir_value_match = 3
470attribute_group_in_ir_value_match = 4
471
472# Check a match for IR_VALUE_RE and inspect it to determine if it was a local
473# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
474def get_idx_from_ir_value_match(match):
475    for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
476        if match.group(i) is not None:
477            return i - first_nameless_group_in_ir_value_match
478    error("Unable to identify the kind of IR value from the match!")
479    return 0
480
481# See get_idx_from_ir_value_match
482def get_name_from_ir_value_match(match):
483    return match.group(get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match)
484
485# Return the nameless prefix we use for this kind or IR value, see also
486# get_idx_from_ir_value_match
487def get_nameless_check_prefix_from_ir_value_match(match):
488    return nameless_values[get_idx_from_ir_value_match(match)].check_prefix
489
490# Return the IR prefix and check prefix we use for this kind or IR value, e.g., (%, TMP) for locals,
491# see also get_idx_from_ir_value_match
492def get_ir_prefix_from_ir_value_match(match):
493    idx = get_idx_from_ir_value_match(match)
494    if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix):
495        return nameless_values[idx].ir_prefix, nameless_values[idx].check_prefix
496    return nameless_values[idx].global_ir_prefix, nameless_values[idx].check_prefix
497
498def get_check_key_from_ir_value_match(match):
499    idx = get_idx_from_ir_value_match(match)
500    return nameless_values[idx].check_key
501
502# Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals,
503# see also get_idx_from_ir_value_match
504def get_ir_prefix_from_ir_value_re_match(match):
505    # for backwards compatibility we check locals with '.*'
506    if is_local_def_ir_value_match(match):
507        return '.*'
508    idx = get_idx_from_ir_value_match(match)
509    if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix):
510        return nameless_values[idx].ir_regexp
511    return nameless_values[idx].global_ir_prefix_regexp
512
513# Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
514def is_local_def_ir_value_match(match):
515    return nameless_values[get_idx_from_ir_value_match(match)].ir_prefix == '%'
516
517# Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'.
518def is_global_scope_ir_value_match(match):
519    return nameless_values[get_idx_from_ir_value_match(match)].global_ir_prefix is not None
520
521# Return true if var clashes with the scripted FileCheck check_prefix.
522def may_clash_with_default_check_prefix_name(check_prefix, var):
523  return check_prefix and re.match(r'^' + check_prefix + r'[0-9]+?$', var, re.IGNORECASE)
524
525# Create a FileCheck variable name based on an IR name.
526def get_value_name(var, check_prefix):
527  var = var.replace('!', '')
528  # This is a nameless value, prepend check_prefix.
529  if var.isdigit():
530    var = check_prefix + var
531  else:
532    # This is a named value that clashes with the check_prefix, prepend with _prefix_filecheck_ir_name,
533    # if it has been defined.
534    if may_clash_with_default_check_prefix_name(check_prefix, var) and _prefix_filecheck_ir_name:
535      var = _prefix_filecheck_ir_name + var
536  var = var.replace('.', '_')
537  var = var.replace('-', '_')
538  return var.upper()
539
540# Create a FileCheck variable from regex.
541def get_value_definition(var, match):
542  # for backwards compatibility we check locals with '.*'
543  if is_local_def_ir_value_match(match):
544    return '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + \
545            get_ir_prefix_from_ir_value_match(match)[0] + get_ir_prefix_from_ir_value_re_match(match) + ']]'
546  prefix = get_ir_prefix_from_ir_value_match(match)[0]
547  return prefix + '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + get_ir_prefix_from_ir_value_re_match(match) + ']]'
548
549# Use a FileCheck variable.
550def get_value_use(var, match, check_prefix):
551  if is_local_def_ir_value_match(match):
552    return '[[' + get_value_name(var, check_prefix) + ']]'
553  prefix = get_ir_prefix_from_ir_value_match(match)[0]
554  return prefix + '[[' + get_value_name(var, check_prefix) + ']]'
555
556# Replace IR value defs and uses with FileCheck variables.
557def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen):
558  # This gets called for each match that occurs in
559  # a line. We transform variables we haven't seen
560  # into defs, and variables we have seen into uses.
561  def transform_line_vars(match):
562    pre, check = get_ir_prefix_from_ir_value_match(match)
563    var = get_name_from_ir_value_match(match)
564    for nameless_value in nameless_values:
565        if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
566          warn("Change IR value name '%s' or use -prefix-ir-filecheck-name to prevent possible conflict"
567            " with scripted FileCheck name." % (var,))
568    key = (var, get_check_key_from_ir_value_match(match))
569    is_local_def = is_local_def_ir_value_match(match)
570    if is_local_def and key in vars_seen:
571      rv = get_value_use(var, match, get_nameless_check_prefix_from_ir_value_match(match))
572    elif not is_local_def and key in global_vars_seen:
573      rv = get_value_use(var, match, global_vars_seen[key])
574    else:
575      if is_local_def:
576         vars_seen.add(key)
577      else:
578         global_vars_seen[key] = get_nameless_check_prefix_from_ir_value_match(match)
579      rv = get_value_definition(var, match)
580    # re.sub replaces the entire regex match
581    # with whatever you return, so we have
582    # to make sure to hand it back everything
583    # including the commas and spaces.
584    return match.group(1) + rv + match.group(match.lastindex)
585
586  lines_with_def = []
587
588  for i, line in enumerate(lines):
589    # An IR variable named '%.' matches the FileCheck regex string.
590    line = line.replace('%.', '%dot')
591    # Ignore any comments, since the check lines will too.
592    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
593    lines[i] = scrubbed_line
594    if not is_analyze:
595      # It can happen that two matches are back-to-back and for some reason sub
596      # will not replace both of them. For now we work around this by
597      # substituting until there is no more match.
598      changed = True
599      while changed:
600          (lines[i], changed) = IR_VALUE_RE.subn(transform_line_vars, lines[i], count=1)
601  return lines
602
603
604def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze, global_vars_seen_dict):
605  # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
606  prefix_exclusions = set()
607  printed_prefixes = []
608  for p in prefix_list:
609    checkprefixes = p[0]
610    # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
611    # exist for this run line. A subset of the check prefixes might know about the function but only because
612    # other run lines created it.
613    if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
614        prefix_exclusions |= set(checkprefixes)
615        continue
616
617  # prefix_exclusions is constructed, we can now emit the output
618  for p in prefix_list:
619    global_vars_seen = {}
620    checkprefixes = p[0]
621    for checkprefix in checkprefixes:
622      if checkprefix in global_vars_seen_dict:
623        global_vars_seen.update(global_vars_seen_dict[checkprefix])
624      else:
625        global_vars_seen_dict[checkprefix] = {}
626      if checkprefix in printed_prefixes:
627        break
628
629      # Check if the prefix is excluded.
630      if checkprefix in prefix_exclusions:
631        continue
632
633      # If we do not have output for this prefix we skip it.
634      if not func_dict[checkprefix][func_name]:
635        continue
636
637      # Add some space between different check prefixes, but not after the last
638      # check line (before the test code).
639      if is_asm:
640        if len(printed_prefixes) != 0:
641          output_lines.append(comment_marker)
642
643      if checkprefix not in global_vars_seen_dict:
644          global_vars_seen_dict[checkprefix] = {}
645
646      global_vars_seen_before = [key for key in global_vars_seen.keys()]
647
648      vars_seen = set()
649      printed_prefixes.append(checkprefix)
650      attrs = str(func_dict[checkprefix][func_name].attrs)
651      attrs = '' if attrs == 'None' else attrs
652      if attrs:
653        output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs))
654      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
655      args_and_sig = generalize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0]
656      if '[[' in args_and_sig:
657        output_lines.append(check_label_format % (checkprefix, func_name, ''))
658        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
659      else:
660        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
661      func_body = str(func_dict[checkprefix][func_name]).splitlines()
662
663      # For ASM output, just emit the check lines.
664      if is_asm:
665        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
666        for func_line in func_body[1:]:
667          if func_line.strip() == '':
668            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
669          else:
670            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
671        break
672
673      # For IR output, change all defs to FileCheck variables, so we're immune
674      # to variable naming fashions.
675      func_body = generalize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen)
676
677      # This could be selectively enabled with an optional invocation argument.
678      # Disabled for now: better to check everything. Be safe rather than sorry.
679
680      # Handle the first line of the function body as a special case because
681      # it's often just noise (a useless asm comment or entry label).
682      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
683      #  is_blank_line = True
684      #else:
685      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
686      #  is_blank_line = False
687
688      is_blank_line = False
689
690      for func_line in func_body:
691        if func_line.strip() == '':
692          is_blank_line = True
693          continue
694        # Do not waste time checking IR comments.
695        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
696
697        # Skip blank lines instead of checking them.
698        if is_blank_line:
699          output_lines.append('{} {}:       {}'.format(
700              comment_marker, checkprefix, func_line))
701        else:
702          output_lines.append('{} {}-NEXT:  {}'.format(
703              comment_marker, checkprefix, func_line))
704        is_blank_line = False
705
706      # Add space between different check prefixes and also before the first
707      # line of code in the test function.
708      output_lines.append(comment_marker)
709
710      # Remembe new global variables we have not seen before
711      for key in global_vars_seen:
712          if key not in global_vars_seen_before:
713              global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
714      break
715
716def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
717                  func_name, preserve_names, function_sig, global_vars_seen_dict):
718  # Label format is based on IR string.
719  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
720  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
721  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
722             check_label_format, False, preserve_names, global_vars_seen_dict)
723
724def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
725  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
726  global_vars_seen_dict = {}
727  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
728             check_label_format, False, True, global_vars_seen_dict)
729
730def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
731  for nameless_value in nameless_values:
732    if nameless_value.global_ir_prefix is None:
733      continue
734
735    lhs_re_str = nameless_value.global_ir_prefix + nameless_value.global_ir_prefix_regexp
736    rhs_re_str = nameless_value.global_ir_rhs_regexp
737
738    global_ir_value_re_str = r'^' + lhs_re_str + r'\s=\s' + rhs_re_str + r'$'
739    global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
740    lines = []
741    for m in global_ir_value_re.finditer(raw_tool_output):
742        lines.append(m.group(0))
743
744    for prefix in prefixes:
745      if glob_val_dict[prefix] is None:
746        continue
747      if nameless_value.check_prefix in glob_val_dict[prefix]:
748        if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
749          continue
750        if prefix == prefixes[-1]:
751          warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
752        else:
753          glob_val_dict[prefix][nameless_value.check_prefix] = None
754          continue
755      glob_val_dict[prefix][nameless_value.check_prefix] = lines
756
757def add_global_checks(glob_val_dict, comment_marker, prefix_list, output_lines, global_vars_seen_dict, is_analyze, is_before_functions):
758  printed_prefixes = set()
759  for nameless_value in nameless_values:
760    if nameless_value.global_ir_prefix is None:
761        continue
762    if nameless_value.is_before_functions != is_before_functions:
763        continue
764    for p in prefix_list:
765      global_vars_seen = {}
766      checkprefixes = p[0]
767      for checkprefix in checkprefixes:
768        if checkprefix in global_vars_seen_dict:
769            global_vars_seen.update(global_vars_seen_dict[checkprefix])
770        else:
771            global_vars_seen_dict[checkprefix] = {}
772        if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
773          break
774        if not glob_val_dict[checkprefix]:
775          continue
776        if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
777          continue
778        if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
779          continue
780
781        output_lines.append(SEPARATOR)
782
783        global_vars_seen_before = [key for key in global_vars_seen.keys()]
784        for line in glob_val_dict[checkprefix][nameless_value.check_prefix]:
785          tmp = generalize_check_lines([line], is_analyze, set(), global_vars_seen)
786          check_line = '%s %s: %s' % (comment_marker, checkprefix, tmp[0])
787          output_lines.append(check_line)
788        printed_prefixes.add((checkprefix, nameless_value.check_prefix))
789
790        # Remembe new global variables we have not seen before
791        for key in global_vars_seen:
792            if key not in global_vars_seen_before:
793                global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
794        break
795
796  if printed_prefixes:
797      output_lines.append(SEPARATOR)
798
799
800def check_prefix(prefix):
801  if not PREFIX_RE.match(prefix):
802        hint = ""
803        if ',' in prefix:
804          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
805        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
806             (prefix))
807
808
809def verify_filecheck_prefixes(fc_cmd):
810  fc_cmd_parts = fc_cmd.split()
811  for part in fc_cmd_parts:
812    if "check-prefix=" in part:
813      prefix = part.split('=', 1)[1]
814      check_prefix(prefix)
815    elif "check-prefixes=" in part:
816      prefixes = part.split('=', 1)[1].split(',')
817      for prefix in prefixes:
818        check_prefix(prefix)
819        if prefixes.count(prefix) > 1:
820          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
821
822
823def get_autogennote_suffix(parser, args):
824  autogenerated_note_args = ''
825  for action in parser._actions:
826    if not hasattr(args, action.dest):
827      continue  # Ignore options such as --help that aren't included in args
828    # Ignore parameters such as paths to the binary or the list of tests
829    if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary',
830                       'clang', 'opt', 'llvm_bin', 'verbose'):
831      continue
832    value = getattr(args, action.dest)
833    if action.const is not None:  # action stores a constant (usually True/False)
834      # Skip actions with different constant values (this happens with boolean
835      # --foo/--no-foo options)
836      if value != action.const:
837        continue
838    if parser.get_default(action.dest) == value:
839      continue  # Don't add default values
840    autogenerated_note_args += action.option_strings[0] + ' '
841    if action.const is None:  # action takes a parameter
842      if action.nargs == '+':
843        value = ' '.join(map(lambda v: '"' + v.strip('"') + '"', value))
844      autogenerated_note_args += '%s ' % value
845  if autogenerated_note_args:
846    autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
847  return autogenerated_note_args
848
849
850def check_for_command(line, parser, args, argv, argparse_callback):
851    cmd_m = UTC_ARGS_CMD.match(line)
852    if cmd_m:
853        for option in cmd_m.group('cmd').strip().split(' '):
854            if option:
855                argv.append(option)
856        args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
857        if argparse_callback is not None:
858          argparse_callback(args)
859    return args, argv
860
861def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
862  result = get_arg_to_check(test_info.args)
863  if not result and is_global:
864    # See if this has been specified via UTC_ARGS.  This is a "global" option
865    # that affects the entire generation of test checks.  If it exists anywhere
866    # in the test, apply it to everything.
867    saw_line = False
868    for line_info in test_info.ro_iterlines():
869      line = line_info.line
870      if not line.startswith(';') and line.strip() != '':
871        saw_line = True
872      result = get_arg_to_check(line_info.args)
873      if result:
874        if warn and saw_line:
875          # We saw the option after already reading some test input lines.
876          # Warn about it.
877          print('WARNING: Found {} in line following test start: '.format(arg_string)
878                + line, file=sys.stderr)
879          print('WARNING: Consider moving {} to top of file'.format(arg_string),
880                file=sys.stderr)
881        break
882  return result
883
884def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
885  for input_line_info in test_info.iterlines(output_lines):
886    line = input_line_info.line
887    args = input_line_info.args
888    if line.strip() == comment_string:
889      continue
890    if line.strip() == SEPARATOR:
891      continue
892    if line.lstrip().startswith(comment_string):
893      m = CHECK_RE.match(line)
894      if m and m.group(1) in prefix_set:
895        continue
896    output_lines.append(line.rstrip('\n'))
897
898def add_checks_at_end(output_lines, prefix_list, func_order,
899                      comment_string, check_generator):
900  added = set()
901  for prefix in prefix_list:
902    prefixes = prefix[0]
903    tool_args = prefix[1]
904    for prefix in prefixes:
905      for func in func_order[prefix]:
906        if added:
907          output_lines.append(comment_string)
908        added.add(func)
909
910        # The add_*_checks routines expect a run list whose items are
911        # tuples that have a list of prefixes as their first element and
912        # tool command args string as their second element.  They output
913        # checks for each prefix in the list of prefixes.  By doing so, it
914        # implicitly assumes that for each function every run line will
915        # generate something for that function.  That is not the case for
916        # generated functions as some run lines might not generate them
917        # (e.g. -fopenmp vs. no -fopenmp).
918        #
919        # Therefore, pass just the prefix we're interested in.  This has
920        # the effect of generating all of the checks for functions of a
921        # single prefix before moving on to the next prefix.  So checks
922        # are ordered by prefix instead of by function as in "normal"
923        # mode.
924        check_generator(output_lines,
925                        [([prefix], tool_args)],
926                        func)
927