xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision a28177035b16e8884107be686a805f82e8f35840)
1from __future__ import print_function
2
3import copy
4import glob
5import os
6import re
7import subprocess
8import sys
9
10##### Common utilities for update_*test_checks.py
11
12
13_verbose = False
14_prefix_filecheck_ir_name = ''
15
16def parse_commandline_args(parser):
17  parser.add_argument('--include-generated-funcs', action='store_true',
18                      help='Output checks for functions not in source')
19  parser.add_argument('-v', '--verbose', action='store_true',
20                      help='Show verbose output')
21  parser.add_argument('-u', '--update-only', action='store_true',
22                      help='Only update test if it was already autogened')
23  parser.add_argument('--force-update', action='store_true',
24                      help='Update test even if it was autogened by a different script')
25  parser.add_argument('--enable', action='store_true', dest='enabled', default=True,
26                       help='Activate CHECK line generation from this point forward')
27  parser.add_argument('--disable', action='store_false', dest='enabled',
28                      help='Deactivate CHECK line generation from this point forward')
29  parser.add_argument('--replace-value-regex', nargs='+', default=[],
30                      help='List of regular expressions to replace matching value names')
31  parser.add_argument('--prefix-filecheck-ir-name', default='',
32                      help='Add a prefix to FileCheck IR value names to avoid conflicts with scripted names')
33  parser.add_argument('--global-value-regex', nargs='+', default=[],
34                      help='List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)')
35  parser.add_argument('--global-hex-value-regex', nargs='+', default=[],
36                      help='List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives')
37  args = parser.parse_args()
38  global _verbose, _global_value_regex, _global_hex_value_regex
39  _verbose = args.verbose
40  _global_value_regex = args.global_value_regex
41  _global_hex_value_regex = args.global_hex_value_regex
42  return args
43
44
45class InputLineInfo(object):
46  def __init__(self, line, line_number, args, argv):
47    self.line = line
48    self.line_number = line_number
49    self.args = args
50    self.argv = argv
51
52
53class TestInfo(object):
54  def __init__(self, test, parser, script_name, input_lines, args, argv,
55               comment_prefix, argparse_callback):
56    self.parser = parser
57    self.argparse_callback = argparse_callback
58    self.path = test
59    self.args = args
60    if args.prefix_filecheck_ir_name:
61      global _prefix_filecheck_ir_name
62      _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
63    self.argv = argv
64    self.input_lines = input_lines
65    self.run_lines = find_run_lines(test, self.input_lines)
66    self.comment_prefix = comment_prefix
67    if self.comment_prefix is None:
68      if self.path.endswith('.mir'):
69        self.comment_prefix = '#'
70      else:
71        self.comment_prefix = ';'
72    self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT
73    self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
74    self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
75
76  def ro_iterlines(self):
77    for line_num, input_line in enumerate(self.input_lines):
78      args, argv = check_for_command(input_line, self.parser,
79                                     self.args, self.argv, self.argparse_callback)
80      yield InputLineInfo(input_line, line_num, args, argv)
81
82  def iterlines(self, output_lines):
83    output_lines.append(self.test_autogenerated_note)
84    for line_info in self.ro_iterlines():
85      input_line = line_info.line
86      # Discard any previous script advertising.
87      if input_line.startswith(self.autogenerated_note_prefix):
88        continue
89      self.args = line_info.args
90      self.argv = line_info.argv
91      if not self.args.enabled:
92        output_lines.append(input_line)
93        continue
94      yield line_info
95
96def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None):
97  for pattern in test_patterns:
98    # On Windows we must expand the patterns ourselves.
99    tests_list = glob.glob(pattern)
100    if not tests_list:
101      warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
102      continue
103    for test in tests_list:
104      with open(test) as f:
105        input_lines = [l.rstrip() for l in f]
106      args = parser.parse_args()
107      if argparse_callback is not None:
108        argparse_callback(args)
109      argv = sys.argv[:]
110      first_line = input_lines[0] if input_lines else ""
111      if UTC_ADVERT in first_line:
112        if script_name not in first_line and not args.force_update:
113          warn("Skipping test which wasn't autogenerated by " + script_name, test)
114          continue
115        args, argv = check_for_command(first_line, parser, args, argv, argparse_callback)
116      elif args.update_only:
117        assert UTC_ADVERT not in first_line
118        warn("Skipping test which isn't autogenerated: " + test)
119        continue
120      yield TestInfo(test, parser, script_name, input_lines, args, argv,
121                     comment_prefix, argparse_callback)
122
123
124def should_add_line_to_output(input_line, prefix_set, skip_global_checks = False, comment_marker = ';'):
125  # Skip any blank comment lines in the IR.
126  if not skip_global_checks and input_line.strip() == comment_marker:
127    return False
128  # Skip a special double comment line we use as a separator.
129  if input_line.strip() == comment_marker + SEPARATOR:
130    return False
131  # Skip any blank lines in the IR.
132  #if input_line.strip() == '':
133  #  return False
134  # And skip any CHECK lines. We're building our own.
135  m = CHECK_RE.match(input_line)
136  if m and m.group(1) in prefix_set:
137    if skip_global_checks:
138      global_ir_value_re = re.compile('\[\[', flags=(re.M))
139      return not global_ir_value_re.search(input_line)
140    return False
141
142  return True
143
144# Perform lit-like substitutions
145def getSubstitutions(sourcepath):
146  sourcedir = os.path.dirname(sourcepath)
147  return [('%s', sourcepath),
148          ('%S', sourcedir),
149          ('%p', sourcedir),
150          ('%{pathsep}', os.pathsep)]
151
152def applySubstitutions(s, substitutions):
153  for a,b in substitutions:
154    s = s.replace(a, b)
155  return s
156
157# Invoke the tool that is being tested.
158def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
159  with open(ir) as ir_file:
160    substitutions = getSubstitutions(ir)
161
162    # TODO Remove the str form which is used by update_test_checks.py and
163    # update_llc_test_checks.py
164    # The safer list form is used by update_cc_test_checks.py
165    if preprocess_cmd:
166      # Allow pre-processing the IR file (e.g. using sed):
167      assert isinstance(preprocess_cmd, str)  # TODO: use a list instead of using shell
168      preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
169      if verbose:
170        print('Pre-processing input file: ', ir, " with command '",
171              preprocess_cmd, "'", sep="", file=sys.stderr)
172      # Python 2.7 doesn't have subprocess.DEVNULL:
173      with open(os.devnull, 'w') as devnull:
174        pp = subprocess.Popen(preprocess_cmd, shell=True, stdin=devnull,
175                              stdout=subprocess.PIPE)
176        ir_file = pp.stdout
177
178    if isinstance(cmd_args, list):
179      args = [applySubstitutions(a, substitutions) for a in cmd_args]
180      stdout = subprocess.check_output([exe] + args, stdin=ir_file)
181    else:
182      stdout = subprocess.check_output(exe + ' ' + applySubstitutions(cmd_args, substitutions),
183                                       shell=True, stdin=ir_file)
184    if sys.version_info[0] > 2:
185      stdout = stdout.decode()
186  # Fix line endings to unix CR style.
187  return stdout.replace('\r\n', '\n')
188
189##### LLVM IR parser
190RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
191CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
192PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
193CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
194
195UTC_ARGS_KEY = 'UTC_ARGS:'
196UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
197UTC_ADVERT = 'NOTE: Assertions have been autogenerated by '
198
199OPT_FUNCTION_RE = re.compile(
200    r'^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.$-]+?)\s*'
201    r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$',
202    flags=(re.M | re.S))
203
204ANALYZE_FUNCTION_RE = re.compile(
205    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':'
206    r'\s*\n(?P<body>.*)$',
207    flags=(re.X | re.S))
208
209IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
210TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
211TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
212MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
213
214SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
215SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
216SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
217SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
218SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
219SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
220SCRUB_LOOP_COMMENT_RE = re.compile(
221    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
222SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
223
224SEPARATOR = '.'
225
226def error(msg, test_file=None):
227  if test_file:
228    msg = '{}: {}'.format(msg, test_file)
229  print('ERROR: {}'.format(msg), file=sys.stderr)
230
231def warn(msg, test_file=None):
232  if test_file:
233    msg = '{}: {}'.format(msg, test_file)
234  print('WARNING: {}'.format(msg), file=sys.stderr)
235
236def debug(*args, **kwargs):
237  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
238  if 'file' not in kwargs:
239    kwargs['file'] = sys.stderr
240  if _verbose:
241    print(*args, **kwargs)
242
243def find_run_lines(test, lines):
244  debug('Scanning for RUN lines in test file:', test)
245  raw_lines = [m.group(1)
246               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
247  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
248  for l in raw_lines[1:]:
249    if run_lines[-1].endswith('\\'):
250      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
251    else:
252      run_lines.append(l)
253  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
254  for l in run_lines:
255    debug('  RUN: {}'.format(l))
256  return run_lines
257
258def scrub_body(body):
259  # Scrub runs of whitespace out of the assembly, but leave the leading
260  # whitespace in place.
261  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
262  # Expand the tabs used for indentation.
263  body = str.expandtabs(body, 2)
264  # Strip trailing whitespace.
265  body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
266  return body
267
268def do_scrub(body, scrubber, scrubber_args, extra):
269  if scrubber_args:
270    local_args = copy.deepcopy(scrubber_args)
271    local_args[0].extra_scrub = extra
272    return scrubber(body, *local_args)
273  return scrubber(body, *scrubber_args)
274
275# Build up a dictionary of all the function bodies.
276class function_body(object):
277  def __init__(self, string, extra, args_and_sig, attrs):
278    self.scrub = string
279    self.extrascrub = extra
280    self.args_and_sig = args_and_sig
281    self.attrs = attrs
282  def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs, is_asm):
283    arg_names = set()
284    def drop_arg_names(match):
285        arg_names.add(match.group(variable_group_in_ir_value_match))
286        if match.group(attribute_group_in_ir_value_match):
287            attr = match.group(attribute_group_in_ir_value_match)
288        else:
289            attr = ''
290        return match.group(1) + attr + match.group(match.lastindex)
291    def repl_arg_names(match):
292        if match.group(variable_group_in_ir_value_match) is not None and match.group(variable_group_in_ir_value_match) in arg_names:
293            return match.group(1) + match.group(match.lastindex)
294        return match.group(1) + match.group(2) + match.group(match.lastindex)
295    if self.attrs != attrs:
296      return False
297    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
298    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
299    if ans0 != ans1:
300        return False
301    if is_asm:
302        # Check without replacements, the replacements are not applied to the
303        # body for asm checks.
304        return self.extrascrub == extrascrub
305
306    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
307    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
308    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
309    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
310    return es0 == es1
311
312  def __str__(self):
313    return self.scrub
314
315class FunctionTestBuilder:
316  def __init__(self, run_list, flags, scrubber_args, path):
317    self._verbose = flags.verbose
318    self._record_args = flags.function_signature
319    self._check_attributes = flags.check_attributes
320    self._scrubber_args = scrubber_args
321    self._path = path
322    # Strip double-quotes if input was read by UTC_ARGS
323    self._replace_value_regex = list(map(lambda x: x.strip('"'), flags.replace_value_regex))
324    self._func_dict = {}
325    self._func_order = {}
326    self._global_var_dict = {}
327    for tuple in run_list:
328      for prefix in tuple[0]:
329        self._func_dict.update({prefix:dict()})
330        self._func_order.update({prefix: []})
331        self._global_var_dict.update({prefix:dict()})
332
333  def finish_and_get_func_dict(self):
334    for prefix in self._get_failed_prefixes():
335      warn('Prefix %s had conflicting output from different RUN lines for all functions in test %s' % (prefix,self._path,))
336    return self._func_dict
337
338  def func_order(self):
339    return self._func_order
340
341  def global_var_dict(self):
342    return self._global_var_dict
343
344  def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes, is_asm):
345    build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
346    for m in function_re.finditer(raw_tool_output):
347      if not m:
348        continue
349      func = m.group('func')
350      body = m.group('body')
351      attrs = m.group('attrs') if self._check_attributes else ''
352      # Determine if we print arguments, the opening brace, or nothing after the
353      # function name
354      if self._record_args and 'args_and_sig' in m.groupdict():
355          args_and_sig = scrub_body(m.group('args_and_sig').strip())
356      elif 'args_and_sig' in m.groupdict():
357          args_and_sig = '('
358      else:
359          args_and_sig = ''
360      scrubbed_body = do_scrub(body, scrubber, self._scrubber_args,
361                               extra=False)
362      scrubbed_extra = do_scrub(body, scrubber, self._scrubber_args,
363                                extra=True)
364      if 'analysis' in m.groupdict():
365        analysis = m.group('analysis')
366        if analysis.lower() != 'cost model analysis':
367          warn('Unsupported analysis mode: %r!' % (analysis,))
368      if func.startswith('stress'):
369        # We only use the last line of the function body for stress tests.
370        scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
371      if self._verbose:
372        print('Processing function: ' + func, file=sys.stderr)
373        for l in scrubbed_body.splitlines():
374          print('  ' + l, file=sys.stderr)
375      for prefix in prefixes:
376        # Replace function names matching the regex.
377        for regex in self._replace_value_regex:
378          # Pattern that matches capture groups in the regex in leftmost order.
379          group_regex = re.compile('\(.*?\)')
380          # Replace function name with regex.
381          match = re.match(regex, func)
382          if match:
383            func_repl = regex
384            # Replace any capture groups with their matched strings.
385            for g in match.groups():
386              func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
387            func = re.sub(func_repl, '{{' + func_repl + '}}', func)
388
389          # Replace all calls to regex matching functions.
390          matches = re.finditer(regex, scrubbed_body)
391          for match in matches:
392            func_repl = regex
393            # Replace any capture groups with their matched strings.
394            for g in match.groups():
395                func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
396            # Substitute function call names that match the regex with the same
397            # capture groups set.
398            scrubbed_body = re.sub(func_repl, '{{' + func_repl + '}}',
399                                   scrubbed_body)
400
401        if func in self._func_dict[prefix]:
402          if (self._func_dict[prefix][func] is None or
403              str(self._func_dict[prefix][func]) != scrubbed_body or
404              self._func_dict[prefix][func].args_and_sig != args_and_sig or
405                  self._func_dict[prefix][func].attrs != attrs):
406            if (self._func_dict[prefix][func] is not None and
407                self._func_dict[prefix][func].is_same_except_arg_names(
408                scrubbed_extra,
409                args_and_sig,
410                attrs,
411                is_asm)):
412              self._func_dict[prefix][func].scrub = scrubbed_extra
413              self._func_dict[prefix][func].args_and_sig = args_and_sig
414              continue
415            else:
416              # This means a previous RUN line produced a body for this function
417              # that is different from the one produced by this current RUN line,
418              # so the body can't be common accross RUN lines. We use None to
419              # indicate that.
420              self._func_dict[prefix][func] = None
421              continue
422
423        self._func_dict[prefix][func] = function_body(
424            scrubbed_body, scrubbed_extra, args_and_sig, attrs)
425        self._func_order[prefix].append(func)
426
427  def _get_failed_prefixes(self):
428    # This returns the list of those prefixes that failed to match any function,
429    # because there were conflicting bodies produced by different RUN lines, in
430    # all instances of the prefix. Effectively, this prefix is unused and should
431    # be removed.
432    for prefix in self._func_dict:
433      if (self._func_dict[prefix] and
434          (not [fct for fct in self._func_dict[prefix]
435                if self._func_dict[prefix][fct] is not None])):
436        yield prefix
437
438
439##### Generator of LLVM IR CHECK lines
440
441SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
442
443# TODO: We should also derive check lines for global, debug, loop declarations, etc..
444
445class NamelessValue:
446    def __init__(self, check_prefix, check_key, ir_prefix, global_ir_prefix, global_ir_prefix_regexp,
447                 ir_regexp, global_ir_rhs_regexp, is_before_functions):
448        self.check_prefix = check_prefix
449        self.check_key = check_key
450        self.ir_prefix = ir_prefix
451        self.global_ir_prefix = global_ir_prefix
452        self.global_ir_prefix_regexp = global_ir_prefix_regexp
453        self.ir_regexp = ir_regexp
454        self.global_ir_rhs_regexp = global_ir_rhs_regexp
455        self.is_before_functions = is_before_functions
456
457# Description of the different "unnamed" values we match in the IR, e.g.,
458# (local) ssa values, (debug) metadata, etc.
459nameless_values = [
460    NamelessValue(r'TMP'  , '%' , r'%'           , None            , None                   , r'[\w$.-]+?' , None                 , False) ,
461    NamelessValue(r'ATTR' , '#' , r'#'           , None            , None                   , r'[0-9]+'    , None                 , False) ,
462    NamelessValue(r'ATTR' , '#' , None           , r'attributes #' , r'[0-9]+'              , None         , r'{[^}]*}'           , False) ,
463    NamelessValue(r'GLOB' , '@' , r'@'           , None            , None                   , r'[0-9]+'    , None                 , False) ,
464    NamelessValue(r'GLOB' , '@' , None           , r'@'            , r'[a-zA-Z0-9_$"\\.-]+' , None         , r'.+'                , True)  ,
465    NamelessValue(r'DBG'  , '!' , r'!dbg '       , None            , None                   , r'![0-9]+'   , None                 , False) ,
466    NamelessValue(r'PROF' , '!' , r'!prof '      , None            , None                   , r'![0-9]+'   , None                 , False) ,
467    NamelessValue(r'TBAA' , '!' , r'!tbaa '      , None            , None                   , r'![0-9]+'   , None                 , False) ,
468    NamelessValue(r'RNG'  , '!' , r'!range '     , None            , None                   , r'![0-9]+'   , None                 , False) ,
469    NamelessValue(r'LOOP' , '!' , r'!llvm.loop ' , None            , None                   , r'![0-9]+'   , None                 , False) ,
470    NamelessValue(r'META' , '!' , r'metadata '   , None            , None                   , r'![0-9]+'   , None                 , False) ,
471    NamelessValue(r'META' , '!' , None           , r''             , r'![0-9]+'             , None         , r'(?:distinct |)!.*' , False) ,
472]
473
474def createOrRegexp(old, new):
475    if not old:
476        return new
477    if not new:
478        return old
479    return old + '|' + new
480
481def createPrefixMatch(prefix_str, prefix_re):
482    if prefix_str is None or prefix_re is None:
483        return ''
484    return '(?:' + prefix_str + '(' + prefix_re + '))'
485
486# Build the regexp that matches an "IR value". This can be a local variable,
487# argument, global, or metadata, anything that is "named". It is important that
488# the PREFIX and SUFFIX below only contain a single group, if that changes
489# other locations will need adjustment as well.
490IR_VALUE_REGEXP_PREFIX = r'(\s*)'
491IR_VALUE_REGEXP_STRING = r''
492for nameless_value in nameless_values:
493    lcl_match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
494    glb_match = createPrefixMatch(nameless_value.global_ir_prefix, nameless_value.global_ir_prefix_regexp)
495    assert((lcl_match or glb_match) and not (lcl_match and glb_match))
496    if lcl_match:
497        IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, lcl_match)
498    elif glb_match:
499        IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, '^' + glb_match)
500IR_VALUE_REGEXP_SUFFIX = r'([,\s\(\)]|\Z)'
501IR_VALUE_RE = re.compile(IR_VALUE_REGEXP_PREFIX + r'(' + IR_VALUE_REGEXP_STRING + r')' + IR_VALUE_REGEXP_SUFFIX)
502
503# The entire match is group 0, the prefix has one group (=1), the entire
504# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
505first_nameless_group_in_ir_value_match = 3
506
507# constants for the group id of special matches
508variable_group_in_ir_value_match = 3
509attribute_group_in_ir_value_match = 4
510
511# Check a match for IR_VALUE_RE and inspect it to determine if it was a local
512# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
513def get_idx_from_ir_value_match(match):
514    for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
515        if match.group(i) is not None:
516            return i - first_nameless_group_in_ir_value_match
517    error("Unable to identify the kind of IR value from the match!")
518    return 0
519
520# See get_idx_from_ir_value_match
521def get_name_from_ir_value_match(match):
522    return match.group(get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match)
523
524# Return the nameless prefix we use for this kind or IR value, see also
525# get_idx_from_ir_value_match
526def get_nameless_check_prefix_from_ir_value_match(match):
527    return nameless_values[get_idx_from_ir_value_match(match)].check_prefix
528
529# Return the IR prefix and check prefix we use for this kind or IR value, e.g., (%, TMP) for locals,
530# see also get_idx_from_ir_value_match
531def get_ir_prefix_from_ir_value_match(match):
532    idx = get_idx_from_ir_value_match(match)
533    if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix):
534        return nameless_values[idx].ir_prefix, nameless_values[idx].check_prefix
535    return nameless_values[idx].global_ir_prefix, nameless_values[idx].check_prefix
536
537def get_check_key_from_ir_value_match(match):
538    idx = get_idx_from_ir_value_match(match)
539    return nameless_values[idx].check_key
540
541# Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals,
542# see also get_idx_from_ir_value_match
543def get_ir_prefix_from_ir_value_re_match(match):
544    # for backwards compatibility we check locals with '.*'
545    if is_local_def_ir_value_match(match):
546        return '.*'
547    idx = get_idx_from_ir_value_match(match)
548    if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix):
549        return nameless_values[idx].ir_regexp
550    return nameless_values[idx].global_ir_prefix_regexp
551
552# Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
553def is_local_def_ir_value_match(match):
554    return nameless_values[get_idx_from_ir_value_match(match)].ir_prefix == '%'
555
556# Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'.
557def is_global_scope_ir_value_match(match):
558    return nameless_values[get_idx_from_ir_value_match(match)].global_ir_prefix is not None
559
560# Return true if var clashes with the scripted FileCheck check_prefix.
561def may_clash_with_default_check_prefix_name(check_prefix, var):
562  return check_prefix and re.match(r'^' + check_prefix + r'[0-9]+?$', var, re.IGNORECASE)
563
564# Create a FileCheck variable name based on an IR name.
565def get_value_name(var, check_prefix):
566  var = var.replace('!', '')
567  # This is a nameless value, prepend check_prefix.
568  if var.isdigit():
569    var = check_prefix + var
570  else:
571    # This is a named value that clashes with the check_prefix, prepend with _prefix_filecheck_ir_name,
572    # if it has been defined.
573    if may_clash_with_default_check_prefix_name(check_prefix, var) and _prefix_filecheck_ir_name:
574      var = _prefix_filecheck_ir_name + var
575  var = var.replace('.', '_')
576  var = var.replace('-', '_')
577  return var.upper()
578
579# Create a FileCheck variable from regex.
580def get_value_definition(var, match):
581  # for backwards compatibility we check locals with '.*'
582  if is_local_def_ir_value_match(match):
583    return '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + \
584            get_ir_prefix_from_ir_value_match(match)[0] + get_ir_prefix_from_ir_value_re_match(match) + ']]'
585  prefix = get_ir_prefix_from_ir_value_match(match)[0]
586  return prefix + '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + get_ir_prefix_from_ir_value_re_match(match) + ']]'
587
588# Use a FileCheck variable.
589def get_value_use(var, match, check_prefix):
590  if is_local_def_ir_value_match(match):
591    return '[[' + get_value_name(var, check_prefix) + ']]'
592  prefix = get_ir_prefix_from_ir_value_match(match)[0]
593  return prefix + '[[' + get_value_name(var, check_prefix) + ']]'
594
595# Replace IR value defs and uses with FileCheck variables.
596def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen):
597  # This gets called for each match that occurs in
598  # a line. We transform variables we haven't seen
599  # into defs, and variables we have seen into uses.
600  def transform_line_vars(match):
601    pre, check = get_ir_prefix_from_ir_value_match(match)
602    var = get_name_from_ir_value_match(match)
603    for nameless_value in nameless_values:
604        if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
605          warn("Change IR value name '%s' or use -prefix-ir-filecheck-name to prevent possible conflict"
606            " with scripted FileCheck name." % (var,))
607    key = (var, get_check_key_from_ir_value_match(match))
608    is_local_def = is_local_def_ir_value_match(match)
609    if is_local_def and key in vars_seen:
610      rv = get_value_use(var, match, get_nameless_check_prefix_from_ir_value_match(match))
611    elif not is_local_def and key in global_vars_seen:
612      rv = get_value_use(var, match, global_vars_seen[key])
613    else:
614      if is_local_def:
615         vars_seen.add(key)
616      else:
617         global_vars_seen[key] = get_nameless_check_prefix_from_ir_value_match(match)
618      rv = get_value_definition(var, match)
619    # re.sub replaces the entire regex match
620    # with whatever you return, so we have
621    # to make sure to hand it back everything
622    # including the commas and spaces.
623    return match.group(1) + rv + match.group(match.lastindex)
624
625  lines_with_def = []
626
627  for i, line in enumerate(lines):
628    # An IR variable named '%.' matches the FileCheck regex string.
629    line = line.replace('%.', '%dot')
630    for regex in _global_hex_value_regex:
631      if re.match('^@' + regex + ' = ', line):
632        line = re.sub(r'\bi([0-9]+) ([0-9]+)',
633            lambda m : 'i' + m.group(1) + ' [[#' + hex(int(m.group(2))) + ']]',
634            line)
635        break
636    # Ignore any comments, since the check lines will too.
637    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
638    lines[i] = scrubbed_line
639    if not is_analyze:
640      # It can happen that two matches are back-to-back and for some reason sub
641      # will not replace both of them. For now we work around this by
642      # substituting until there is no more match.
643      changed = True
644      while changed:
645          (lines[i], changed) = IR_VALUE_RE.subn(transform_line_vars, lines[i], count=1)
646  return lines
647
648
649def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze, global_vars_seen_dict):
650  # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
651  prefix_exclusions = set()
652  printed_prefixes = []
653  for p in prefix_list:
654    checkprefixes = p[0]
655    # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
656    # exist for this run line. A subset of the check prefixes might know about the function but only because
657    # other run lines created it.
658    if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
659        prefix_exclusions |= set(checkprefixes)
660        continue
661
662  # prefix_exclusions is constructed, we can now emit the output
663  for p in prefix_list:
664    global_vars_seen = {}
665    checkprefixes = p[0]
666    for checkprefix in checkprefixes:
667      if checkprefix in global_vars_seen_dict:
668        global_vars_seen.update(global_vars_seen_dict[checkprefix])
669      else:
670        global_vars_seen_dict[checkprefix] = {}
671      if checkprefix in printed_prefixes:
672        break
673
674      # Check if the prefix is excluded.
675      if checkprefix in prefix_exclusions:
676        continue
677
678      # If we do not have output for this prefix we skip it.
679      if not func_dict[checkprefix][func_name]:
680        continue
681
682      # Add some space between different check prefixes, but not after the last
683      # check line (before the test code).
684      if is_asm:
685        if len(printed_prefixes) != 0:
686          output_lines.append(comment_marker)
687
688      if checkprefix not in global_vars_seen_dict:
689          global_vars_seen_dict[checkprefix] = {}
690
691      global_vars_seen_before = [key for key in global_vars_seen.keys()]
692
693      vars_seen = set()
694      printed_prefixes.append(checkprefix)
695      attrs = str(func_dict[checkprefix][func_name].attrs)
696      attrs = '' if attrs == 'None' else attrs
697      if attrs:
698        output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs))
699      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
700      args_and_sig = generalize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0]
701      if '[[' in args_and_sig:
702        output_lines.append(check_label_format % (checkprefix, func_name, ''))
703        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
704      else:
705        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
706      func_body = str(func_dict[checkprefix][func_name]).splitlines()
707
708      # For ASM output, just emit the check lines.
709      if is_asm:
710        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
711        for func_line in func_body[1:]:
712          if func_line.strip() == '':
713            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
714          else:
715            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
716        break
717
718      # For IR output, change all defs to FileCheck variables, so we're immune
719      # to variable naming fashions.
720      func_body = generalize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen)
721
722      # This could be selectively enabled with an optional invocation argument.
723      # Disabled for now: better to check everything. Be safe rather than sorry.
724
725      # Handle the first line of the function body as a special case because
726      # it's often just noise (a useless asm comment or entry label).
727      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
728      #  is_blank_line = True
729      #else:
730      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
731      #  is_blank_line = False
732
733      is_blank_line = False
734
735      for func_line in func_body:
736        if func_line.strip() == '':
737          is_blank_line = True
738          continue
739        # Do not waste time checking IR comments.
740        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
741
742        # Skip blank lines instead of checking them.
743        if is_blank_line:
744          output_lines.append('{} {}:       {}'.format(
745              comment_marker, checkprefix, func_line))
746        else:
747          output_lines.append('{} {}-NEXT:  {}'.format(
748              comment_marker, checkprefix, func_line))
749        is_blank_line = False
750
751      # Add space between different check prefixes and also before the first
752      # line of code in the test function.
753      output_lines.append(comment_marker)
754
755      # Remembe new global variables we have not seen before
756      for key in global_vars_seen:
757          if key not in global_vars_seen_before:
758              global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
759      break
760
761def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
762                  func_name, preserve_names, function_sig, global_vars_seen_dict):
763  # Label format is based on IR string.
764  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
765  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
766  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
767             check_label_format, False, preserve_names, global_vars_seen_dict)
768
769def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
770  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
771  global_vars_seen_dict = {}
772  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
773             check_label_format, False, True, global_vars_seen_dict)
774
775def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
776  for nameless_value in nameless_values:
777    if nameless_value.global_ir_prefix is None:
778      continue
779
780    lhs_re_str = nameless_value.global_ir_prefix + nameless_value.global_ir_prefix_regexp
781    rhs_re_str = nameless_value.global_ir_rhs_regexp
782
783    global_ir_value_re_str = r'^' + lhs_re_str + r'\s=\s' + rhs_re_str + r'$'
784    global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
785    lines = []
786    for m in global_ir_value_re.finditer(raw_tool_output):
787        lines.append(m.group(0))
788
789    for prefix in prefixes:
790      if glob_val_dict[prefix] is None:
791        continue
792      if nameless_value.check_prefix in glob_val_dict[prefix]:
793        if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
794          continue
795        if prefix == prefixes[-1]:
796          warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
797        else:
798          glob_val_dict[prefix][nameless_value.check_prefix] = None
799          continue
800      glob_val_dict[prefix][nameless_value.check_prefix] = lines
801
802def add_global_checks(glob_val_dict, comment_marker, prefix_list, output_lines, global_vars_seen_dict, is_analyze, is_before_functions):
803  printed_prefixes = set()
804  for nameless_value in nameless_values:
805    if nameless_value.global_ir_prefix is None:
806        continue
807    if nameless_value.is_before_functions != is_before_functions:
808        continue
809    for p in prefix_list:
810      global_vars_seen = {}
811      checkprefixes = p[0]
812      if checkprefixes is None:
813        continue
814      for checkprefix in checkprefixes:
815        if checkprefix in global_vars_seen_dict:
816            global_vars_seen.update(global_vars_seen_dict[checkprefix])
817        else:
818            global_vars_seen_dict[checkprefix] = {}
819        if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
820          break
821        if not glob_val_dict[checkprefix]:
822          continue
823        if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
824          continue
825        if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
826          continue
827
828        check_lines = []
829        global_vars_seen_before = [key for key in global_vars_seen.keys()]
830        for line in glob_val_dict[checkprefix][nameless_value.check_prefix]:
831          if _global_value_regex:
832            matched = False
833            for regex in _global_value_regex:
834              if re.match('^@' + regex + ' = ', line):
835                matched = True
836                break
837            if not matched:
838              continue
839          tmp = generalize_check_lines([line], is_analyze, set(), global_vars_seen)
840          check_line = '%s %s: %s' % (comment_marker, checkprefix, tmp[0])
841          check_lines.append(check_line)
842        if not check_lines:
843          continue
844
845        output_lines.append(comment_marker + SEPARATOR)
846        for check_line in check_lines:
847          output_lines.append(check_line)
848
849        printed_prefixes.add((checkprefix, nameless_value.check_prefix))
850
851        # Remembe new global variables we have not seen before
852        for key in global_vars_seen:
853            if key not in global_vars_seen_before:
854                global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
855        break
856
857  if printed_prefixes:
858      output_lines.append(comment_marker + SEPARATOR)
859
860
861def check_prefix(prefix):
862  if not PREFIX_RE.match(prefix):
863        hint = ""
864        if ',' in prefix:
865          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
866        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
867             (prefix))
868
869
870def verify_filecheck_prefixes(fc_cmd):
871  fc_cmd_parts = fc_cmd.split()
872  for part in fc_cmd_parts:
873    if "check-prefix=" in part:
874      prefix = part.split('=', 1)[1]
875      check_prefix(prefix)
876    elif "check-prefixes=" in part:
877      prefixes = part.split('=', 1)[1].split(',')
878      for prefix in prefixes:
879        check_prefix(prefix)
880        if prefixes.count(prefix) > 1:
881          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
882
883
884def get_autogennote_suffix(parser, args):
885  autogenerated_note_args = ''
886  for action in parser._actions:
887    if not hasattr(args, action.dest):
888      continue  # Ignore options such as --help that aren't included in args
889    # Ignore parameters such as paths to the binary or the list of tests
890    if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary',
891                       'clang', 'opt', 'llvm_bin', 'verbose'):
892      continue
893    value = getattr(args, action.dest)
894    if action.const is not None:  # action stores a constant (usually True/False)
895      # Skip actions with different constant values (this happens with boolean
896      # --foo/--no-foo options)
897      if value != action.const:
898        continue
899    if parser.get_default(action.dest) == value:
900      continue  # Don't add default values
901    autogenerated_note_args += action.option_strings[0] + ' '
902    if action.const is None:  # action takes a parameter
903      if action.nargs == '+':
904        value = ' '.join(map(lambda v: '"' + v.strip('"') + '"', value))
905      autogenerated_note_args += '%s ' % value
906  if autogenerated_note_args:
907    autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
908  return autogenerated_note_args
909
910
911def check_for_command(line, parser, args, argv, argparse_callback):
912    cmd_m = UTC_ARGS_CMD.match(line)
913    if cmd_m:
914        for option in cmd_m.group('cmd').strip().split(' '):
915            if option:
916                argv.append(option)
917        args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
918        if argparse_callback is not None:
919          argparse_callback(args)
920    return args, argv
921
922def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
923  result = get_arg_to_check(test_info.args)
924  if not result and is_global:
925    # See if this has been specified via UTC_ARGS.  This is a "global" option
926    # that affects the entire generation of test checks.  If it exists anywhere
927    # in the test, apply it to everything.
928    saw_line = False
929    for line_info in test_info.ro_iterlines():
930      line = line_info.line
931      if not line.startswith(';') and line.strip() != '':
932        saw_line = True
933      result = get_arg_to_check(line_info.args)
934      if result:
935        if warn and saw_line:
936          # We saw the option after already reading some test input lines.
937          # Warn about it.
938          print('WARNING: Found {} in line following test start: '.format(arg_string)
939                + line, file=sys.stderr)
940          print('WARNING: Consider moving {} to top of file'.format(arg_string),
941                file=sys.stderr)
942        break
943  return result
944
945def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
946  for input_line_info in test_info.iterlines(output_lines):
947    line = input_line_info.line
948    args = input_line_info.args
949    if line.strip() == comment_string:
950      continue
951    if line.strip() == comment_string + SEPARATOR:
952      continue
953    if line.lstrip().startswith(comment_string):
954      m = CHECK_RE.match(line)
955      if m and m.group(1) in prefix_set:
956        continue
957    output_lines.append(line.rstrip('\n'))
958
959def add_checks_at_end(output_lines, prefix_list, func_order,
960                      comment_string, check_generator):
961  added = set()
962  for prefix in prefix_list:
963    prefixes = prefix[0]
964    tool_args = prefix[1]
965    for prefix in prefixes:
966      for func in func_order[prefix]:
967        if added:
968          output_lines.append(comment_string)
969        added.add(func)
970
971        # The add_*_checks routines expect a run list whose items are
972        # tuples that have a list of prefixes as their first element and
973        # tool command args string as their second element.  They output
974        # checks for each prefix in the list of prefixes.  By doing so, it
975        # implicitly assumes that for each function every run line will
976        # generate something for that function.  That is not the case for
977        # generated functions as some run lines might not generate them
978        # (e.g. -fopenmp vs. no -fopenmp).
979        #
980        # Therefore, pass just the prefix we're interested in.  This has
981        # the effect of generating all of the checks for functions of a
982        # single prefix before moving on to the next prefix.  So checks
983        # are ordered by prefix instead of by function as in "normal"
984        # mode.
985        check_generator(output_lines,
986                        [([prefix], tool_args)],
987                        func)
988