xref: /llvm-project/llvm/utils/UpdateTestChecks/common.py (revision de37912f000e7e8e42fbf16a832dd4020f85c06f)
1from __future__ import print_function
2
3import copy
4import glob
5import os
6import re
7import subprocess
8import sys
9
10##### Common utilities for update_*test_checks.py
11
12
13_verbose = False
14_prefix_filecheck_ir_name = ''
15
16def parse_commandline_args(parser):
17  parser.add_argument('--include-generated-funcs', action='store_true',
18                      help='Output checks for functions not in source')
19  parser.add_argument('-v', '--verbose', action='store_true',
20                      help='Show verbose output')
21  parser.add_argument('-u', '--update-only', action='store_true',
22                      help='Only update test if it was already autogened')
23  parser.add_argument('--force-update', action='store_true',
24                      help='Update test even if it was autogened by a different script')
25  parser.add_argument('--enable', action='store_true', dest='enabled', default=True,
26                       help='Activate CHECK line generation from this point forward')
27  parser.add_argument('--disable', action='store_false', dest='enabled',
28                      help='Deactivate CHECK line generation from this point forward')
29  parser.add_argument('--replace-value-regex', nargs='+', default=[],
30                      help='List of regular expressions to replace matching value names')
31  parser.add_argument('--prefix-filecheck-ir-name', default='',
32                      help='Add a prefix to FileCheck IR value names to avoid conflicts with scripted names')
33  parser.add_argument('--global-value-regex', nargs='+', default=[],
34                      help='List of regular expressions that a global value declaration must match to generate a check (has no effect if checking globals is not enabled)')
35  parser.add_argument('--global-hex-value-regex', nargs='+', default=[],
36                      help='List of regular expressions such that, for matching global value declarations, literal integer values should be encoded in hex in the associated FileCheck directives')
37  args = parser.parse_args()
38  global _verbose, _global_value_regex, _global_hex_value_regex
39  _verbose = args.verbose
40  _global_value_regex = args.global_value_regex
41  _global_hex_value_regex = args.global_hex_value_regex
42  return args
43
44
45class InputLineInfo(object):
46  def __init__(self, line, line_number, args, argv):
47    self.line = line
48    self.line_number = line_number
49    self.args = args
50    self.argv = argv
51
52
53class TestInfo(object):
54  def __init__(self, test, parser, script_name, input_lines, args, argv,
55               comment_prefix, argparse_callback):
56    self.parser = parser
57    self.argparse_callback = argparse_callback
58    self.path = test
59    self.args = args
60    if args.prefix_filecheck_ir_name:
61      global _prefix_filecheck_ir_name
62      _prefix_filecheck_ir_name = args.prefix_filecheck_ir_name
63    self.argv = argv
64    self.input_lines = input_lines
65    self.run_lines = find_run_lines(test, self.input_lines)
66    self.comment_prefix = comment_prefix
67    if self.comment_prefix is None:
68      if self.path.endswith('.mir'):
69        self.comment_prefix = '#'
70      else:
71        self.comment_prefix = ';'
72    self.autogenerated_note_prefix = self.comment_prefix + ' ' + UTC_ADVERT
73    self.test_autogenerated_note = self.autogenerated_note_prefix + script_name
74    self.test_autogenerated_note += get_autogennote_suffix(parser, self.args)
75
76  def ro_iterlines(self):
77    for line_num, input_line in enumerate(self.input_lines):
78      args, argv = check_for_command(input_line, self.parser,
79                                     self.args, self.argv, self.argparse_callback)
80      yield InputLineInfo(input_line, line_num, args, argv)
81
82  def iterlines(self, output_lines):
83    output_lines.append(self.test_autogenerated_note)
84    for line_info in self.ro_iterlines():
85      input_line = line_info.line
86      # Discard any previous script advertising.
87      if input_line.startswith(self.autogenerated_note_prefix):
88        continue
89      self.args = line_info.args
90      self.argv = line_info.argv
91      if not self.args.enabled:
92        output_lines.append(input_line)
93        continue
94      yield line_info
95
96def itertests(test_patterns, parser, script_name, comment_prefix=None, argparse_callback=None):
97  for pattern in test_patterns:
98    # On Windows we must expand the patterns ourselves.
99    tests_list = glob.glob(pattern)
100    if not tests_list:
101      warn("Test file pattern '%s' was not found. Ignoring it." % (pattern,))
102      continue
103    for test in tests_list:
104      with open(test) as f:
105        input_lines = [l.rstrip() for l in f]
106      args = parser.parse_args()
107      if argparse_callback is not None:
108        argparse_callback(args)
109      argv = sys.argv[:]
110      first_line = input_lines[0] if input_lines else ""
111      if UTC_ADVERT in first_line:
112        if script_name not in first_line and not args.force_update:
113          warn("Skipping test which wasn't autogenerated by " + script_name, test)
114          continue
115        args, argv = check_for_command(first_line, parser, args, argv, argparse_callback)
116      elif args.update_only:
117        assert UTC_ADVERT not in first_line
118        warn("Skipping test which isn't autogenerated: " + test)
119        continue
120      yield TestInfo(test, parser, script_name, input_lines, args, argv,
121                     comment_prefix, argparse_callback)
122
123
124def should_add_line_to_output(input_line, prefix_set, skip_global_checks = False, comment_marker = ';'):
125  # Skip any blank comment lines in the IR.
126  if not skip_global_checks and input_line.strip() == comment_marker:
127    return False
128  # Skip a special double comment line we use as a separator.
129  if input_line.strip() == comment_marker + SEPARATOR:
130    return False
131  # Skip any blank lines in the IR.
132  #if input_line.strip() == '':
133  #  return False
134  # And skip any CHECK lines. We're building our own.
135  m = CHECK_RE.match(input_line)
136  if m and m.group(1) in prefix_set:
137    if skip_global_checks:
138      global_ir_value_re = re.compile('\[\[', flags=(re.M))
139      return not global_ir_value_re.search(input_line)
140    return False
141
142  return True
143
144# Perform lit-like substitutions
145def getSubstitutions(sourcepath):
146  sourcedir = os.path.dirname(sourcepath)
147  return [('%s', sourcepath),
148          ('%S', sourcedir),
149          ('%p', sourcedir),
150          ('%{pathsep}', os.pathsep)]
151
152def applySubstitutions(s, substitutions):
153  for a,b in substitutions:
154    s = s.replace(a, b)
155  return s
156
157# Invoke the tool that is being tested.
158def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
159  with open(ir) as ir_file:
160    substitutions = getSubstitutions(ir)
161
162    # TODO Remove the str form which is used by update_test_checks.py and
163    # update_llc_test_checks.py
164    # The safer list form is used by update_cc_test_checks.py
165    if preprocess_cmd:
166      # Allow pre-processing the IR file (e.g. using sed):
167      assert isinstance(preprocess_cmd, str)  # TODO: use a list instead of using shell
168      preprocess_cmd = applySubstitutions(preprocess_cmd, substitutions).strip()
169      if verbose:
170        print('Pre-processing input file: ', ir, " with command '",
171              preprocess_cmd, "'", sep="", file=sys.stderr)
172      # Python 2.7 doesn't have subprocess.DEVNULL:
173      with open(os.devnull, 'w') as devnull:
174        pp = subprocess.Popen(preprocess_cmd, shell=True, stdin=devnull,
175                              stdout=subprocess.PIPE)
176        ir_file = pp.stdout
177
178    if isinstance(cmd_args, list):
179      args = [applySubstitutions(a, substitutions) for a in cmd_args]
180      stdout = subprocess.check_output([exe] + args, stdin=ir_file)
181    else:
182      stdout = subprocess.check_output(exe + ' ' + applySubstitutions(cmd_args, substitutions),
183                                       shell=True, stdin=ir_file)
184    if sys.version_info[0] > 2:
185      # FYI, if you crashed here with a decode error, your run line probably
186      # results in bitcode or other binary format being written to the pipe.
187      # For an opt test, you probably want to add -S or -disable-output.
188      stdout = stdout.decode()
189  # Fix line endings to unix CR style.
190  return stdout.replace('\r\n', '\n')
191
192##### LLVM IR parser
193RUN_LINE_RE = re.compile(r'^\s*(?://|[;#])\s*RUN:\s*(.*)$')
194CHECK_PREFIX_RE = re.compile(r'--?check-prefix(?:es)?[= ](\S+)')
195PREFIX_RE = re.compile('^[a-zA-Z0-9_-]+$')
196CHECK_RE = re.compile(r'^\s*(?://|[;#])\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL|-SAME|-EMPTY)?:')
197
198UTC_ARGS_KEY = 'UTC_ARGS:'
199UTC_ARGS_CMD = re.compile(r'.*' + UTC_ARGS_KEY + '\s*(?P<cmd>.*)\s*$')
200UTC_ADVERT = 'NOTE: Assertions have been autogenerated by '
201
202OPT_FUNCTION_RE = re.compile(
203    r'^(\s*;\s*Function\sAttrs:\s(?P<attrs>[\w\s]+?))?\s*define\s+(?:internal\s+)?[^@]*@(?P<func>[\w.$-]+?)\s*'
204    r'(?P<args_and_sig>\((\)|(.*?[\w.-]+?)\))[^{]*\{)\n(?P<body>.*?)^\}$',
205    flags=(re.M | re.S))
206
207ANALYZE_FUNCTION_RE = re.compile(
208    r'^\s*\'(?P<analysis>[\w\s-]+?)\'\s+for\s+function\s+\'(?P<func>[\w.$-]+?)\':'
209    r'\s*\n(?P<body>.*)$',
210    flags=(re.X | re.S))
211
212IR_FUNCTION_RE = re.compile(r'^\s*define\s+(?:internal\s+)?[^@]*@"?([\w.$-]+)"?\s*\(')
213TRIPLE_IR_RE = re.compile(r'^\s*target\s+triple\s*=\s*"([^"]+)"$')
214TRIPLE_ARG_RE = re.compile(r'-mtriple[= ]([^ ]+)')
215MARCH_ARG_RE = re.compile(r'-march[= ]([^ ]+)')
216
217SCRUB_LEADING_WHITESPACE_RE = re.compile(r'^(\s+)')
218SCRUB_WHITESPACE_RE = re.compile(r'(?!^(|  \w))[ \t]+', flags=re.M)
219SCRUB_TRAILING_WHITESPACE_RE = re.compile(r'[ \t]+$', flags=re.M)
220SCRUB_TRAILING_WHITESPACE_TEST_RE = SCRUB_TRAILING_WHITESPACE_RE
221SCRUB_TRAILING_WHITESPACE_AND_ATTRIBUTES_RE = re.compile(r'([ \t]|(#[0-9]+))+$', flags=re.M)
222SCRUB_KILL_COMMENT_RE = re.compile(r'^ *#+ +kill:.*\n')
223SCRUB_LOOP_COMMENT_RE = re.compile(
224    r'# =>This Inner Loop Header:.*|# in Loop:.*', flags=re.M)
225SCRUB_TAILING_COMMENT_TOKEN_RE = re.compile(r'(?<=\S)+[ \t]*#$', flags=re.M)
226
227SEPARATOR = '.'
228
229def error(msg, test_file=None):
230  if test_file:
231    msg = '{}: {}'.format(msg, test_file)
232  print('ERROR: {}'.format(msg), file=sys.stderr)
233
234def warn(msg, test_file=None):
235  if test_file:
236    msg = '{}: {}'.format(msg, test_file)
237  print('WARNING: {}'.format(msg), file=sys.stderr)
238
239def debug(*args, **kwargs):
240  # Python2 does not allow def debug(*args, file=sys.stderr, **kwargs):
241  if 'file' not in kwargs:
242    kwargs['file'] = sys.stderr
243  if _verbose:
244    print(*args, **kwargs)
245
246def find_run_lines(test, lines):
247  debug('Scanning for RUN lines in test file:', test)
248  raw_lines = [m.group(1)
249               for m in [RUN_LINE_RE.match(l) for l in lines] if m]
250  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
251  for l in raw_lines[1:]:
252    if run_lines[-1].endswith('\\'):
253      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
254    else:
255      run_lines.append(l)
256  debug('Found {} RUN lines in {}:'.format(len(run_lines), test))
257  for l in run_lines:
258    debug('  RUN: {}'.format(l))
259  return run_lines
260
261def scrub_body(body):
262  # Scrub runs of whitespace out of the assembly, but leave the leading
263  # whitespace in place.
264  body = SCRUB_WHITESPACE_RE.sub(r' ', body)
265  # Expand the tabs used for indentation.
266  body = str.expandtabs(body, 2)
267  # Strip trailing whitespace.
268  body = SCRUB_TRAILING_WHITESPACE_TEST_RE.sub(r'', body)
269  return body
270
271def do_scrub(body, scrubber, scrubber_args, extra):
272  if scrubber_args:
273    local_args = copy.deepcopy(scrubber_args)
274    local_args[0].extra_scrub = extra
275    return scrubber(body, *local_args)
276  return scrubber(body, *scrubber_args)
277
278# Build up a dictionary of all the function bodies.
279class function_body(object):
280  def __init__(self, string, extra, args_and_sig, attrs):
281    self.scrub = string
282    self.extrascrub = extra
283    self.args_and_sig = args_and_sig
284    self.attrs = attrs
285  def is_same_except_arg_names(self, extrascrub, args_and_sig, attrs, is_asm):
286    arg_names = set()
287    def drop_arg_names(match):
288        arg_names.add(match.group(variable_group_in_ir_value_match))
289        if match.group(attribute_group_in_ir_value_match):
290            attr = match.group(attribute_group_in_ir_value_match)
291        else:
292            attr = ''
293        return match.group(1) + attr + match.group(match.lastindex)
294    def repl_arg_names(match):
295        if match.group(variable_group_in_ir_value_match) is not None and match.group(variable_group_in_ir_value_match) in arg_names:
296            return match.group(1) + match.group(match.lastindex)
297        return match.group(1) + match.group(2) + match.group(match.lastindex)
298    if self.attrs != attrs:
299      return False
300    ans0 = IR_VALUE_RE.sub(drop_arg_names, self.args_and_sig)
301    ans1 = IR_VALUE_RE.sub(drop_arg_names, args_and_sig)
302    if ans0 != ans1:
303        return False
304    if is_asm:
305        # Check without replacements, the replacements are not applied to the
306        # body for asm checks.
307        return self.extrascrub == extrascrub
308
309    es0 = IR_VALUE_RE.sub(repl_arg_names, self.extrascrub)
310    es1 = IR_VALUE_RE.sub(repl_arg_names, extrascrub)
311    es0 = SCRUB_IR_COMMENT_RE.sub(r'', es0)
312    es1 = SCRUB_IR_COMMENT_RE.sub(r'', es1)
313    return es0 == es1
314
315  def __str__(self):
316    return self.scrub
317
318class FunctionTestBuilder:
319  def __init__(self, run_list, flags, scrubber_args, path):
320    self._verbose = flags.verbose
321    self._record_args = flags.function_signature
322    self._check_attributes = flags.check_attributes
323    self._scrubber_args = scrubber_args
324    self._path = path
325    # Strip double-quotes if input was read by UTC_ARGS
326    self._replace_value_regex = list(map(lambda x: x.strip('"'), flags.replace_value_regex))
327    self._func_dict = {}
328    self._func_order = {}
329    self._global_var_dict = {}
330    for tuple in run_list:
331      for prefix in tuple[0]:
332        self._func_dict.update({prefix:dict()})
333        self._func_order.update({prefix: []})
334        self._global_var_dict.update({prefix:dict()})
335
336  def finish_and_get_func_dict(self):
337    for prefix in self._get_failed_prefixes():
338      warn('Prefix %s had conflicting output from different RUN lines for all functions in test %s' % (prefix,self._path,))
339    return self._func_dict
340
341  def func_order(self):
342    return self._func_order
343
344  def global_var_dict(self):
345    return self._global_var_dict
346
347  def process_run_line(self, function_re, scrubber, raw_tool_output, prefixes, is_asm):
348    build_global_values_dictionary(self._global_var_dict, raw_tool_output, prefixes)
349    for m in function_re.finditer(raw_tool_output):
350      if not m:
351        continue
352      func = m.group('func')
353      body = m.group('body')
354      attrs = m.group('attrs') if self._check_attributes else ''
355      # Determine if we print arguments, the opening brace, or nothing after the
356      # function name
357      if self._record_args and 'args_and_sig' in m.groupdict():
358          args_and_sig = scrub_body(m.group('args_and_sig').strip())
359      elif 'args_and_sig' in m.groupdict():
360          args_and_sig = '('
361      else:
362          args_and_sig = ''
363      scrubbed_body = do_scrub(body, scrubber, self._scrubber_args,
364                               extra=False)
365      scrubbed_extra = do_scrub(body, scrubber, self._scrubber_args,
366                                extra=True)
367      if 'analysis' in m.groupdict():
368        analysis = m.group('analysis')
369        if analysis.lower() != 'cost model analysis':
370          warn('Unsupported analysis mode: %r!' % (analysis,))
371      if func.startswith('stress'):
372        # We only use the last line of the function body for stress tests.
373        scrubbed_body = '\n'.join(scrubbed_body.splitlines()[-1:])
374      if self._verbose:
375        print('Processing function: ' + func, file=sys.stderr)
376        for l in scrubbed_body.splitlines():
377          print('  ' + l, file=sys.stderr)
378      for prefix in prefixes:
379        # Replace function names matching the regex.
380        for regex in self._replace_value_regex:
381          # Pattern that matches capture groups in the regex in leftmost order.
382          group_regex = re.compile('\(.*?\)')
383          # Replace function name with regex.
384          match = re.match(regex, func)
385          if match:
386            func_repl = regex
387            # Replace any capture groups with their matched strings.
388            for g in match.groups():
389              func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
390            func = re.sub(func_repl, '{{' + func_repl + '}}', func)
391
392          # Replace all calls to regex matching functions.
393          matches = re.finditer(regex, scrubbed_body)
394          for match in matches:
395            func_repl = regex
396            # Replace any capture groups with their matched strings.
397            for g in match.groups():
398                func_repl = group_regex.sub(re.escape(g), func_repl, count=1)
399            # Substitute function call names that match the regex with the same
400            # capture groups set.
401            scrubbed_body = re.sub(func_repl, '{{' + func_repl + '}}',
402                                   scrubbed_body)
403
404        if func in self._func_dict[prefix]:
405          if (self._func_dict[prefix][func] is None or
406              str(self._func_dict[prefix][func]) != scrubbed_body or
407              self._func_dict[prefix][func].args_and_sig != args_and_sig or
408                  self._func_dict[prefix][func].attrs != attrs):
409            if (self._func_dict[prefix][func] is not None and
410                self._func_dict[prefix][func].is_same_except_arg_names(
411                scrubbed_extra,
412                args_and_sig,
413                attrs,
414                is_asm)):
415              self._func_dict[prefix][func].scrub = scrubbed_extra
416              self._func_dict[prefix][func].args_and_sig = args_and_sig
417              continue
418            else:
419              # This means a previous RUN line produced a body for this function
420              # that is different from the one produced by this current RUN line,
421              # so the body can't be common accross RUN lines. We use None to
422              # indicate that.
423              self._func_dict[prefix][func] = None
424              continue
425
426        self._func_dict[prefix][func] = function_body(
427            scrubbed_body, scrubbed_extra, args_and_sig, attrs)
428        self._func_order[prefix].append(func)
429
430  def _get_failed_prefixes(self):
431    # This returns the list of those prefixes that failed to match any function,
432    # because there were conflicting bodies produced by different RUN lines, in
433    # all instances of the prefix. Effectively, this prefix is unused and should
434    # be removed.
435    for prefix in self._func_dict:
436      if (self._func_dict[prefix] and
437          (not [fct for fct in self._func_dict[prefix]
438                if self._func_dict[prefix][fct] is not None])):
439        yield prefix
440
441
442##### Generator of LLVM IR CHECK lines
443
444SCRUB_IR_COMMENT_RE = re.compile(r'\s*;.*')
445
446# TODO: We should also derive check lines for global, debug, loop declarations, etc..
447
448class NamelessValue:
449    def __init__(self, check_prefix, check_key, ir_prefix, global_ir_prefix, global_ir_prefix_regexp,
450                 ir_regexp, global_ir_rhs_regexp, is_before_functions):
451        self.check_prefix = check_prefix
452        self.check_key = check_key
453        self.ir_prefix = ir_prefix
454        self.global_ir_prefix = global_ir_prefix
455        self.global_ir_prefix_regexp = global_ir_prefix_regexp
456        self.ir_regexp = ir_regexp
457        self.global_ir_rhs_regexp = global_ir_rhs_regexp
458        self.is_before_functions = is_before_functions
459
460# Description of the different "unnamed" values we match in the IR, e.g.,
461# (local) ssa values, (debug) metadata, etc.
462nameless_values = [
463    NamelessValue(r'TMP'  , '%' , r'%'           , None            , None                   , r'[\w$.-]+?' , None                 , False) ,
464    NamelessValue(r'ATTR' , '#' , r'#'           , None            , None                   , r'[0-9]+'    , None                 , False) ,
465    NamelessValue(r'ATTR' , '#' , None           , r'attributes #' , r'[0-9]+'              , None         , r'{[^}]*}'           , False) ,
466    NamelessValue(r'GLOB' , '@' , r'@'           , None            , None                   , r'[0-9]+'    , None                 , False) ,
467    NamelessValue(r'GLOB' , '@' , None           , r'@'            , r'[a-zA-Z0-9_$"\\.-]+' , None         , r'.+'                , True)  ,
468    NamelessValue(r'DBG'  , '!' , r'!dbg '       , None            , None                   , r'![0-9]+'   , None                 , False) ,
469    NamelessValue(r'PROF' , '!' , r'!prof '      , None            , None                   , r'![0-9]+'   , None                 , False) ,
470    NamelessValue(r'TBAA' , '!' , r'!tbaa '      , None            , None                   , r'![0-9]+'   , None                 , False) ,
471    NamelessValue(r'RNG'  , '!' , r'!range '     , None            , None                   , r'![0-9]+'   , None                 , False) ,
472    NamelessValue(r'LOOP' , '!' , r'!llvm.loop ' , None            , None                   , r'![0-9]+'   , None                 , False) ,
473    NamelessValue(r'META' , '!' , r'metadata '   , None            , None                   , r'![0-9]+'   , None                 , False) ,
474    NamelessValue(r'META' , '!' , None           , r''             , r'![0-9]+'             , None         , r'(?:distinct |)!.*' , False) ,
475]
476
477def createOrRegexp(old, new):
478    if not old:
479        return new
480    if not new:
481        return old
482    return old + '|' + new
483
484def createPrefixMatch(prefix_str, prefix_re):
485    if prefix_str is None or prefix_re is None:
486        return ''
487    return '(?:' + prefix_str + '(' + prefix_re + '))'
488
489# Build the regexp that matches an "IR value". This can be a local variable,
490# argument, global, or metadata, anything that is "named". It is important that
491# the PREFIX and SUFFIX below only contain a single group, if that changes
492# other locations will need adjustment as well.
493IR_VALUE_REGEXP_PREFIX = r'(\s*)'
494IR_VALUE_REGEXP_STRING = r''
495for nameless_value in nameless_values:
496    lcl_match = createPrefixMatch(nameless_value.ir_prefix, nameless_value.ir_regexp)
497    glb_match = createPrefixMatch(nameless_value.global_ir_prefix, nameless_value.global_ir_prefix_regexp)
498    assert((lcl_match or glb_match) and not (lcl_match and glb_match))
499    if lcl_match:
500        IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, lcl_match)
501    elif glb_match:
502        IR_VALUE_REGEXP_STRING = createOrRegexp(IR_VALUE_REGEXP_STRING, '^' + glb_match)
503IR_VALUE_REGEXP_SUFFIX = r'([,\s\(\)]|\Z)'
504IR_VALUE_RE = re.compile(IR_VALUE_REGEXP_PREFIX + r'(' + IR_VALUE_REGEXP_STRING + r')' + IR_VALUE_REGEXP_SUFFIX)
505
506# The entire match is group 0, the prefix has one group (=1), the entire
507# IR_VALUE_REGEXP_STRING is one group (=2), and then the nameless values start.
508first_nameless_group_in_ir_value_match = 3
509
510# constants for the group id of special matches
511variable_group_in_ir_value_match = 3
512attribute_group_in_ir_value_match = 4
513
514# Check a match for IR_VALUE_RE and inspect it to determine if it was a local
515# value, %..., global @..., debug number !dbg !..., etc. See the PREFIXES above.
516def get_idx_from_ir_value_match(match):
517    for i in range(first_nameless_group_in_ir_value_match, match.lastindex):
518        if match.group(i) is not None:
519            return i - first_nameless_group_in_ir_value_match
520    error("Unable to identify the kind of IR value from the match!")
521    return 0
522
523# See get_idx_from_ir_value_match
524def get_name_from_ir_value_match(match):
525    return match.group(get_idx_from_ir_value_match(match) + first_nameless_group_in_ir_value_match)
526
527# Return the nameless prefix we use for this kind or IR value, see also
528# get_idx_from_ir_value_match
529def get_nameless_check_prefix_from_ir_value_match(match):
530    return nameless_values[get_idx_from_ir_value_match(match)].check_prefix
531
532# Return the IR prefix and check prefix we use for this kind or IR value, e.g., (%, TMP) for locals,
533# see also get_idx_from_ir_value_match
534def get_ir_prefix_from_ir_value_match(match):
535    idx = get_idx_from_ir_value_match(match)
536    if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix):
537        return nameless_values[idx].ir_prefix, nameless_values[idx].check_prefix
538    return nameless_values[idx].global_ir_prefix, nameless_values[idx].check_prefix
539
540def get_check_key_from_ir_value_match(match):
541    idx = get_idx_from_ir_value_match(match)
542    return nameless_values[idx].check_key
543
544# Return the IR regexp we use for this kind or IR value, e.g., [\w.-]+? for locals,
545# see also get_idx_from_ir_value_match
546def get_ir_prefix_from_ir_value_re_match(match):
547    # for backwards compatibility we check locals with '.*'
548    if is_local_def_ir_value_match(match):
549        return '.*'
550    idx = get_idx_from_ir_value_match(match)
551    if nameless_values[idx].ir_prefix and match.group(0).strip().startswith(nameless_values[idx].ir_prefix):
552        return nameless_values[idx].ir_regexp
553    return nameless_values[idx].global_ir_prefix_regexp
554
555# Return true if this kind of IR value is "local", basically if it matches '%{{.*}}'.
556def is_local_def_ir_value_match(match):
557    return nameless_values[get_idx_from_ir_value_match(match)].ir_prefix == '%'
558
559# Return true if this kind of IR value is "global", basically if it matches '#{{.*}}'.
560def is_global_scope_ir_value_match(match):
561    return nameless_values[get_idx_from_ir_value_match(match)].global_ir_prefix is not None
562
563# Return true if var clashes with the scripted FileCheck check_prefix.
564def may_clash_with_default_check_prefix_name(check_prefix, var):
565  return check_prefix and re.match(r'^' + check_prefix + r'[0-9]+?$', var, re.IGNORECASE)
566
567# Create a FileCheck variable name based on an IR name.
568def get_value_name(var, check_prefix):
569  var = var.replace('!', '')
570  # This is a nameless value, prepend check_prefix.
571  if var.isdigit():
572    var = check_prefix + var
573  else:
574    # This is a named value that clashes with the check_prefix, prepend with _prefix_filecheck_ir_name,
575    # if it has been defined.
576    if may_clash_with_default_check_prefix_name(check_prefix, var) and _prefix_filecheck_ir_name:
577      var = _prefix_filecheck_ir_name + var
578  var = var.replace('.', '_')
579  var = var.replace('-', '_')
580  return var.upper()
581
582# Create a FileCheck variable from regex.
583def get_value_definition(var, match):
584  # for backwards compatibility we check locals with '.*'
585  if is_local_def_ir_value_match(match):
586    return '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + \
587            get_ir_prefix_from_ir_value_match(match)[0] + get_ir_prefix_from_ir_value_re_match(match) + ']]'
588  prefix = get_ir_prefix_from_ir_value_match(match)[0]
589  return prefix + '[[' + get_value_name(var, get_nameless_check_prefix_from_ir_value_match(match)) + ':' + get_ir_prefix_from_ir_value_re_match(match) + ']]'
590
591# Use a FileCheck variable.
592def get_value_use(var, match, check_prefix):
593  if is_local_def_ir_value_match(match):
594    return '[[' + get_value_name(var, check_prefix) + ']]'
595  prefix = get_ir_prefix_from_ir_value_match(match)[0]
596  return prefix + '[[' + get_value_name(var, check_prefix) + ']]'
597
598# Replace IR value defs and uses with FileCheck variables.
599def generalize_check_lines(lines, is_analyze, vars_seen, global_vars_seen):
600  # This gets called for each match that occurs in
601  # a line. We transform variables we haven't seen
602  # into defs, and variables we have seen into uses.
603  def transform_line_vars(match):
604    pre, check = get_ir_prefix_from_ir_value_match(match)
605    var = get_name_from_ir_value_match(match)
606    for nameless_value in nameless_values:
607        if may_clash_with_default_check_prefix_name(nameless_value.check_prefix, var):
608          warn("Change IR value name '%s' or use --prefix-filecheck-ir-name to prevent possible conflict"
609            " with scripted FileCheck name." % (var,))
610    key = (var, get_check_key_from_ir_value_match(match))
611    is_local_def = is_local_def_ir_value_match(match)
612    if is_local_def and key in vars_seen:
613      rv = get_value_use(var, match, get_nameless_check_prefix_from_ir_value_match(match))
614    elif not is_local_def and key in global_vars_seen:
615      rv = get_value_use(var, match, global_vars_seen[key])
616    else:
617      if is_local_def:
618         vars_seen.add(key)
619      else:
620         global_vars_seen[key] = get_nameless_check_prefix_from_ir_value_match(match)
621      rv = get_value_definition(var, match)
622    # re.sub replaces the entire regex match
623    # with whatever you return, so we have
624    # to make sure to hand it back everything
625    # including the commas and spaces.
626    return match.group(1) + rv + match.group(match.lastindex)
627
628  lines_with_def = []
629
630  for i, line in enumerate(lines):
631    # An IR variable named '%.' matches the FileCheck regex string.
632    line = line.replace('%.', '%dot')
633    for regex in _global_hex_value_regex:
634      if re.match('^@' + regex + ' = ', line):
635        line = re.sub(r'\bi([0-9]+) ([0-9]+)',
636            lambda m : 'i' + m.group(1) + ' [[#' + hex(int(m.group(2))) + ']]',
637            line)
638        break
639    # Ignore any comments, since the check lines will too.
640    scrubbed_line = SCRUB_IR_COMMENT_RE.sub(r'', line)
641    lines[i] = scrubbed_line
642    if not is_analyze:
643      # It can happen that two matches are back-to-back and for some reason sub
644      # will not replace both of them. For now we work around this by
645      # substituting until there is no more match.
646      changed = True
647      while changed:
648          (lines[i], changed) = IR_VALUE_RE.subn(transform_line_vars, lines[i], count=1)
649  return lines
650
651
652def add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, is_asm, is_analyze, global_vars_seen_dict):
653  # prefix_exclusions are prefixes we cannot use to print the function because it doesn't exist in run lines that use these prefixes as well.
654  prefix_exclusions = set()
655  printed_prefixes = []
656  for p in prefix_list:
657    checkprefixes = p[0]
658    # If not all checkprefixes of this run line produced the function we cannot check for it as it does not
659    # exist for this run line. A subset of the check prefixes might know about the function but only because
660    # other run lines created it.
661    if any(map(lambda checkprefix: func_name not in func_dict[checkprefix], checkprefixes)):
662        prefix_exclusions |= set(checkprefixes)
663        continue
664
665  # prefix_exclusions is constructed, we can now emit the output
666  for p in prefix_list:
667    global_vars_seen = {}
668    checkprefixes = p[0]
669    for checkprefix in checkprefixes:
670      if checkprefix in global_vars_seen_dict:
671        global_vars_seen.update(global_vars_seen_dict[checkprefix])
672      else:
673        global_vars_seen_dict[checkprefix] = {}
674      if checkprefix in printed_prefixes:
675        break
676
677      # Check if the prefix is excluded.
678      if checkprefix in prefix_exclusions:
679        continue
680
681      # If we do not have output for this prefix we skip it.
682      if not func_dict[checkprefix][func_name]:
683        continue
684
685      # Add some space between different check prefixes, but not after the last
686      # check line (before the test code).
687      if is_asm:
688        if len(printed_prefixes) != 0:
689          output_lines.append(comment_marker)
690
691      if checkprefix not in global_vars_seen_dict:
692          global_vars_seen_dict[checkprefix] = {}
693
694      global_vars_seen_before = [key for key in global_vars_seen.keys()]
695
696      vars_seen = set()
697      printed_prefixes.append(checkprefix)
698      attrs = str(func_dict[checkprefix][func_name].attrs)
699      attrs = '' if attrs == 'None' else attrs
700      if attrs:
701        output_lines.append('%s %s: Function Attrs: %s' % (comment_marker, checkprefix, attrs))
702      args_and_sig = str(func_dict[checkprefix][func_name].args_and_sig)
703      args_and_sig = generalize_check_lines([args_and_sig], is_analyze, vars_seen, global_vars_seen)[0]
704      if '[[' in args_and_sig:
705        output_lines.append(check_label_format % (checkprefix, func_name, ''))
706        output_lines.append('%s %s-SAME: %s' % (comment_marker, checkprefix, args_and_sig))
707      else:
708        output_lines.append(check_label_format % (checkprefix, func_name, args_and_sig))
709      func_body = str(func_dict[checkprefix][func_name]).splitlines()
710
711      # For ASM output, just emit the check lines.
712      if is_asm:
713        output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
714        for func_line in func_body[1:]:
715          if func_line.strip() == '':
716            output_lines.append('%s %s-EMPTY:' % (comment_marker, checkprefix))
717          else:
718            output_lines.append('%s %s-NEXT:  %s' % (comment_marker, checkprefix, func_line))
719        break
720
721      # For IR output, change all defs to FileCheck variables, so we're immune
722      # to variable naming fashions.
723      func_body = generalize_check_lines(func_body, is_analyze, vars_seen, global_vars_seen)
724
725      # This could be selectively enabled with an optional invocation argument.
726      # Disabled for now: better to check everything. Be safe rather than sorry.
727
728      # Handle the first line of the function body as a special case because
729      # it's often just noise (a useless asm comment or entry label).
730      #if func_body[0].startswith("#") or func_body[0].startswith("entry:"):
731      #  is_blank_line = True
732      #else:
733      #  output_lines.append('%s %s:       %s' % (comment_marker, checkprefix, func_body[0]))
734      #  is_blank_line = False
735
736      is_blank_line = False
737
738      for func_line in func_body:
739        if func_line.strip() == '':
740          is_blank_line = True
741          continue
742        # Do not waste time checking IR comments.
743        func_line = SCRUB_IR_COMMENT_RE.sub(r'', func_line)
744
745        # Skip blank lines instead of checking them.
746        if is_blank_line:
747          output_lines.append('{} {}:       {}'.format(
748              comment_marker, checkprefix, func_line))
749        else:
750          output_lines.append('{} {}-NEXT:  {}'.format(
751              comment_marker, checkprefix, func_line))
752        is_blank_line = False
753
754      # Add space between different check prefixes and also before the first
755      # line of code in the test function.
756      output_lines.append(comment_marker)
757
758      # Remembe new global variables we have not seen before
759      for key in global_vars_seen:
760          if key not in global_vars_seen_before:
761              global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
762      break
763
764def add_ir_checks(output_lines, comment_marker, prefix_list, func_dict,
765                  func_name, preserve_names, function_sig, global_vars_seen_dict):
766  # Label format is based on IR string.
767  function_def_regex = 'define {{[^@]+}}' if function_sig else ''
768  check_label_format = '{} %s-LABEL: {}@%s%s'.format(comment_marker, function_def_regex)
769  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
770             check_label_format, False, preserve_names, global_vars_seen_dict)
771
772def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name):
773  check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker)
774  global_vars_seen_dict = {}
775  add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name,
776             check_label_format, False, True, global_vars_seen_dict)
777
778def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes):
779  for nameless_value in nameless_values:
780    if nameless_value.global_ir_prefix is None:
781      continue
782
783    lhs_re_str = nameless_value.global_ir_prefix + nameless_value.global_ir_prefix_regexp
784    rhs_re_str = nameless_value.global_ir_rhs_regexp
785
786    global_ir_value_re_str = r'^' + lhs_re_str + r'\s=\s' + rhs_re_str + r'$'
787    global_ir_value_re = re.compile(global_ir_value_re_str, flags=(re.M))
788    lines = []
789    for m in global_ir_value_re.finditer(raw_tool_output):
790        lines.append(m.group(0))
791
792    for prefix in prefixes:
793      if glob_val_dict[prefix] is None:
794        continue
795      if nameless_value.check_prefix in glob_val_dict[prefix]:
796        if lines == glob_val_dict[prefix][nameless_value.check_prefix]:
797          continue
798        if prefix == prefixes[-1]:
799          warn('Found conflicting asm under the same prefix: %r!' % (prefix,))
800        else:
801          glob_val_dict[prefix][nameless_value.check_prefix] = None
802          continue
803      glob_val_dict[prefix][nameless_value.check_prefix] = lines
804
805def add_global_checks(glob_val_dict, comment_marker, prefix_list, output_lines, global_vars_seen_dict, is_analyze, is_before_functions):
806  printed_prefixes = set()
807  for nameless_value in nameless_values:
808    if nameless_value.global_ir_prefix is None:
809        continue
810    if nameless_value.is_before_functions != is_before_functions:
811        continue
812    for p in prefix_list:
813      global_vars_seen = {}
814      checkprefixes = p[0]
815      if checkprefixes is None:
816        continue
817      for checkprefix in checkprefixes:
818        if checkprefix in global_vars_seen_dict:
819            global_vars_seen.update(global_vars_seen_dict[checkprefix])
820        else:
821            global_vars_seen_dict[checkprefix] = {}
822        if (checkprefix, nameless_value.check_prefix) in printed_prefixes:
823          break
824        if not glob_val_dict[checkprefix]:
825          continue
826        if nameless_value.check_prefix not in glob_val_dict[checkprefix]:
827          continue
828        if not glob_val_dict[checkprefix][nameless_value.check_prefix]:
829          continue
830
831        check_lines = []
832        global_vars_seen_before = [key for key in global_vars_seen.keys()]
833        for line in glob_val_dict[checkprefix][nameless_value.check_prefix]:
834          if _global_value_regex:
835            matched = False
836            for regex in _global_value_regex:
837              if re.match('^@' + regex + ' = ', line):
838                matched = True
839                break
840            if not matched:
841              continue
842          tmp = generalize_check_lines([line], is_analyze, set(), global_vars_seen)
843          check_line = '%s %s: %s' % (comment_marker, checkprefix, tmp[0])
844          check_lines.append(check_line)
845        if not check_lines:
846          continue
847
848        output_lines.append(comment_marker + SEPARATOR)
849        for check_line in check_lines:
850          output_lines.append(check_line)
851
852        printed_prefixes.add((checkprefix, nameless_value.check_prefix))
853
854        # Remembe new global variables we have not seen before
855        for key in global_vars_seen:
856            if key not in global_vars_seen_before:
857                global_vars_seen_dict[checkprefix][key] = global_vars_seen[key]
858        break
859
860  if printed_prefixes:
861      output_lines.append(comment_marker + SEPARATOR)
862
863
864def check_prefix(prefix):
865  if not PREFIX_RE.match(prefix):
866        hint = ""
867        if ',' in prefix:
868          hint = " Did you mean '--check-prefixes=" + prefix + "'?"
869        warn(("Supplied prefix '%s' is invalid. Prefix must contain only alphanumeric characters, hyphens and underscores." + hint) %
870             (prefix))
871
872
873def verify_filecheck_prefixes(fc_cmd):
874  fc_cmd_parts = fc_cmd.split()
875  for part in fc_cmd_parts:
876    if "check-prefix=" in part:
877      prefix = part.split('=', 1)[1]
878      check_prefix(prefix)
879    elif "check-prefixes=" in part:
880      prefixes = part.split('=', 1)[1].split(',')
881      for prefix in prefixes:
882        check_prefix(prefix)
883        if prefixes.count(prefix) > 1:
884          warn("Supplied prefix '%s' is not unique in the prefix list." % (prefix,))
885
886
887def get_autogennote_suffix(parser, args):
888  autogenerated_note_args = ''
889  for action in parser._actions:
890    if not hasattr(args, action.dest):
891      continue  # Ignore options such as --help that aren't included in args
892    # Ignore parameters such as paths to the binary or the list of tests
893    if action.dest in ('tests', 'update_only', 'opt_binary', 'llc_binary',
894                       'clang', 'opt', 'llvm_bin', 'verbose'):
895      continue
896    value = getattr(args, action.dest)
897    if action.const is not None:  # action stores a constant (usually True/False)
898      # Skip actions with different constant values (this happens with boolean
899      # --foo/--no-foo options)
900      if value != action.const:
901        continue
902    if parser.get_default(action.dest) == value:
903      continue  # Don't add default values
904    autogenerated_note_args += action.option_strings[0] + ' '
905    if action.const is None:  # action takes a parameter
906      if action.nargs == '+':
907        value = ' '.join(map(lambda v: '"' + v.strip('"') + '"', value))
908      autogenerated_note_args += '%s ' % value
909  if autogenerated_note_args:
910    autogenerated_note_args = ' %s %s' % (UTC_ARGS_KEY, autogenerated_note_args[:-1])
911  return autogenerated_note_args
912
913
914def check_for_command(line, parser, args, argv, argparse_callback):
915    cmd_m = UTC_ARGS_CMD.match(line)
916    if cmd_m:
917        for option in cmd_m.group('cmd').strip().split(' '):
918            if option:
919                argv.append(option)
920        args = parser.parse_args(filter(lambda arg: arg not in args.tests, argv))
921        if argparse_callback is not None:
922          argparse_callback(args)
923    return args, argv
924
925def find_arg_in_test(test_info, get_arg_to_check, arg_string, is_global):
926  result = get_arg_to_check(test_info.args)
927  if not result and is_global:
928    # See if this has been specified via UTC_ARGS.  This is a "global" option
929    # that affects the entire generation of test checks.  If it exists anywhere
930    # in the test, apply it to everything.
931    saw_line = False
932    for line_info in test_info.ro_iterlines():
933      line = line_info.line
934      if not line.startswith(';') and line.strip() != '':
935        saw_line = True
936      result = get_arg_to_check(line_info.args)
937      if result:
938        if warn and saw_line:
939          # We saw the option after already reading some test input lines.
940          # Warn about it.
941          print('WARNING: Found {} in line following test start: '.format(arg_string)
942                + line, file=sys.stderr)
943          print('WARNING: Consider moving {} to top of file'.format(arg_string),
944                file=sys.stderr)
945        break
946  return result
947
948def dump_input_lines(output_lines, test_info, prefix_set, comment_string):
949  for input_line_info in test_info.iterlines(output_lines):
950    line = input_line_info.line
951    args = input_line_info.args
952    if line.strip() == comment_string:
953      continue
954    if line.strip() == comment_string + SEPARATOR:
955      continue
956    if line.lstrip().startswith(comment_string):
957      m = CHECK_RE.match(line)
958      if m and m.group(1) in prefix_set:
959        continue
960    output_lines.append(line.rstrip('\n'))
961
962def add_checks_at_end(output_lines, prefix_list, func_order,
963                      comment_string, check_generator):
964  added = set()
965  for prefix in prefix_list:
966    prefixes = prefix[0]
967    tool_args = prefix[1]
968    for prefix in prefixes:
969      for func in func_order[prefix]:
970        if added:
971          output_lines.append(comment_string)
972        added.add(func)
973
974        # The add_*_checks routines expect a run list whose items are
975        # tuples that have a list of prefixes as their first element and
976        # tool command args string as their second element.  They output
977        # checks for each prefix in the list of prefixes.  By doing so, it
978        # implicitly assumes that for each function every run line will
979        # generate something for that function.  That is not the case for
980        # generated functions as some run lines might not generate them
981        # (e.g. -fopenmp vs. no -fopenmp).
982        #
983        # Therefore, pass just the prefix we're interested in.  This has
984        # the effect of generating all of the checks for functions of a
985        # single prefix before moving on to the next prefix.  So checks
986        # are ordered by prefix instead of by function as in "normal"
987        # mode.
988        check_generator(output_lines,
989                        [([prefix], tool_args)],
990                        func)
991