xref: /llvm-project/llvm/utils/update_mca_test_checks.py (revision dee7bfdb9fb839973b9635530a270189bb7d94b3)
1#!/usr/bin/env python2.7
2
3"""A test case update script.
4
5This script is a utility to update LLVM 'llvm-mca' based test cases with new
6FileCheck patterns.
7"""
8
9import argparse
10from collections import defaultdict
11import glob
12import os
13import sys
14import warnings
15
16from UpdateTestChecks import common
17
18
19COMMENT_CHAR = '#'
20ADVERT_PREFIX = '{} NOTE: Assertions have been autogenerated by '.format(
21    COMMENT_CHAR)
22ADVERT = '{}utils/{}'.format(ADVERT_PREFIX, os.path.basename(__file__))
23
24
25class Error(Exception):
26  """ Generic Error that can be raised without printing a traceback.
27  """
28  pass
29
30
31def _warn(msg):
32  """ Log a user warning to stderr.
33  """
34  warnings.warn(msg, Warning, stacklevel=2)
35
36
37def _configure_warnings(args):
38  warnings.resetwarnings()
39  if args.w:
40    warnings.simplefilter('ignore')
41  if args.Werror:
42    warnings.simplefilter('error')
43
44
45def _showwarning(message, category, filename, lineno, file=None, line=None):
46  """ Version of warnings.showwarning that won't attempt to print out the
47      line at the location of the warning if the line text is not explicitly
48      specified.
49  """
50  if file is None:
51    file = sys.stderr
52  if line is None:
53    line = ''
54  file.write(warnings.formatwarning(message, category, filename, lineno, line))
55
56
57def _parse_args():
58  parser = argparse.ArgumentParser(description=__doc__)
59  parser.add_argument('-v', '--verbose',
60                      action='store_true',
61                      help='show verbose output')
62  parser.add_argument('-w',
63                      action='store_true',
64                      help='suppress warnings')
65  parser.add_argument('-Werror',
66                      action='store_true',
67                      help='promote warnings to errors')
68  parser.add_argument('--llvm-mca-binary',
69                      metavar='<path>',
70                      default='llvm-mca',
71                      help='the binary to use to generate the test case '
72                           '(default: llvm-mca)')
73  parser.add_argument('tests',
74                      metavar='<test-path>',
75                      nargs='+')
76  args = parser.parse_args()
77
78  _configure_warnings(args)
79
80  if not args.llvm_mca_binary:
81    raise Error('--llvm-mca-binary value cannot be empty string')
82
83  if 'llvm-mca' not in os.path.basename(args.llvm_mca_binary):
84    _warn('unexpected binary name: {}'.format(args.llvm_mca_binary))
85
86  return args
87
88
89def _find_run_lines(input_lines, args):
90  raw_lines = [m.group(1)
91               for m in [common.RUN_LINE_RE.match(l) for l in input_lines]
92               if m]
93  run_lines = [raw_lines[0]] if len(raw_lines) > 0 else []
94  for l in raw_lines[1:]:
95    if run_lines[-1].endswith(r'\\'):
96      run_lines[-1] = run_lines[-1].rstrip('\\') + ' ' + l
97    else:
98      run_lines.append(l)
99
100  if args.verbose:
101    sys.stderr.write('Found {} RUN line{}:\n'.format(
102        len(run_lines), '' if len(run_lines) == 1 else 's'))
103    for line in run_lines:
104      sys.stderr.write('  RUN: {}\n'.format(line))
105
106  return run_lines
107
108
109def _get_run_infos(run_lines, args):
110  run_infos = []
111  for run_line in run_lines:
112    try:
113      (tool_cmd, filecheck_cmd) = tuple([cmd.strip()
114                                        for cmd in run_line.split('|', 1)])
115    except ValueError:
116      _warn('could not split tool and filecheck commands: {}'.format(run_line))
117      continue
118
119    tool_basename = os.path.splitext(os.path.basename(args.llvm_mca_binary))[0]
120
121    if not tool_cmd.startswith(tool_basename + ' '):
122      _warn('skipping non-{} RUN line: {}'.format(tool_basename, run_line))
123      continue
124
125    if not filecheck_cmd.startswith('FileCheck '):
126      _warn('skipping non-FileCheck RUN line: {}'.format(run_line))
127      continue
128
129    tool_cmd_args = tool_cmd[len(tool_basename):].strip()
130    tool_cmd_args = tool_cmd_args.replace('< %s', '').replace('%s', '').strip()
131
132    check_prefixes = [item
133                      for m in common.CHECK_PREFIX_RE.finditer(filecheck_cmd)
134                      for item in m.group(1).split(',')]
135    if not check_prefixes:
136      check_prefixes = ['CHECK']
137
138    run_infos.append((check_prefixes, tool_cmd_args))
139
140  return run_infos
141
142
143def _break_down_block(block_info, common_prefix):
144  """ Given a block_info, see if we can analyze it further to let us break it
145      down by prefix per-line rather than per-block.
146  """
147  texts = block_info.keys()
148  prefixes = list(block_info.values())
149  # Split the lines from each of the incoming block_texts and zip them so that
150  # each element contains the corresponding lines from each text.  E.g.
151  #
152  # block_text_1: A   # line 1
153  #               B   # line 2
154  #
155  # block_text_2: A   # line 1
156  #               C   # line 2
157  #
158  # would become:
159  #
160  # [(A, A),   # line 1
161  #  (B, C)]   # line 2
162  #
163  line_tuples = list(zip(*list((text.splitlines() for text in texts))))
164
165  # To simplify output, we'll only proceed if the very first line of the block
166  # texts is common to each of them.
167  if len(set(line_tuples[0])) != 1:
168    return []
169
170  result = []
171  lresult = defaultdict(list)
172  for i, line in enumerate(line_tuples):
173    if len(set(line)) == 1:
174      # We're about to output a line with the common prefix.  This is a sync
175      # point so flush any batched-up lines one prefix at a time to the output
176      # first.
177      for prefix in sorted(lresult):
178        result.extend(lresult[prefix])
179      lresult = defaultdict(list)
180
181      # The line is common to each block so output with the common prefix.
182      result.append((common_prefix, line[0]))
183    else:
184      # The line is not common to each block, or we don't have a common prefix.
185      # If there are no prefixes available, warn and bail out.
186      if not prefixes[0]:
187        _warn('multiple lines not disambiguated by prefixes:\n{}\n'
188              'Some blocks may be skipped entirely as a result.'.format(
189                  '\n'.join('  - {}'.format(l) for l in line)))
190        return []
191
192      # Iterate through the line from each of the blocks and add the line with
193      # the corresponding prefix to the current batch of results so that we can
194      # later output them per-prefix.
195      for i, l in enumerate(line):
196        for prefix in prefixes[i]:
197          lresult[prefix].append((prefix, l))
198
199  # Flush any remaining batched-up lines one prefix at a time to the output.
200  for prefix in sorted(lresult):
201    result.extend(lresult[prefix])
202  return result
203
204
205def _get_useful_prefix_info(run_infos):
206  """ Given the run_infos, calculate any prefixes that are common to every one,
207      and the length of the longest prefix string.
208  """
209  try:
210    all_sets = [set(s) for s in list(zip(*run_infos))[0]]
211    common_to_all = set.intersection(*all_sets)
212    longest_prefix_len = max(len(p) for p in set.union(*all_sets))
213  except IndexError:
214    common_to_all = []
215    longest_prefix_len = 0
216  else:
217    if len(common_to_all) > 1:
218      _warn('Multiple prefixes common to all RUN lines: {}'.format(
219          common_to_all))
220    if common_to_all:
221      common_to_all = sorted(common_to_all)[0]
222  return common_to_all, longest_prefix_len
223
224
225def _align_matching_blocks(all_blocks, farthest_indexes):
226  """ Some sub-sequences of blocks may be common to multiple lists of blocks,
227      but at different indexes in each one.
228
229      For example, in the following case, A,B,E,F, and H are common to both
230      sets, but only A and B would be identified as such due to the indexes
231      matching:
232
233      index | 0 1 2 3 4 5 6
234      ------+--------------
235      setA  | A B C D E F H
236      setB  | A B E F G H
237
238      This function attempts to align the indexes of matching blocks by
239      inserting empty blocks into the block list. With this approach, A, B, E,
240      F, and H would now be able to be identified as matching blocks:
241
242      index | 0 1 2 3 4 5 6 7
243      ------+----------------
244      setA  | A B C D E F   H
245      setB  | A B     E F G H
246  """
247
248  # "Farthest block analysis": essentially, iterate over all blocks and find
249  # the highest index into a block list for the first instance of each block.
250  # This is relatively expensive, but we're dealing with small numbers of
251  # blocks so it doesn't make a perceivable difference to user time.
252  for blocks in all_blocks.values():
253    for block in blocks:
254      if not block:
255        continue
256
257      index = blocks.index(block)
258
259      if index > farthest_indexes[block]:
260        farthest_indexes[block] = index
261
262  # Use the results of the above analysis to identify any blocks that can be
263  # shunted along to match the farthest index value.
264  for blocks in all_blocks.values():
265    for index, block in enumerate(blocks):
266      if not block:
267        continue
268
269      changed = False
270      while(index < farthest_indexes[block]):
271        blocks.insert(index, '')
272        index += 1
273        changed = True
274
275      if changed:
276        # Bail out.  We'll need to re-do the farthest block analysis now that
277        # we've inserted some blocks.
278        return True
279
280  return False
281
282
283def _get_block_infos(run_infos, test_path, args, common_prefix):  # noqa
284  """ For each run line, run the tool with the specified args and collect the
285      output. We use the concept of 'blocks' for uniquing, where a block is
286      a series of lines of text with no more than one newline character between
287      each one.  For example:
288
289      This
290      is
291      one
292      block
293
294      This is
295      another block
296
297      This is yet another block
298
299      We then build up a 'block_infos' structure containing a dict where the
300      text of each block is the key and a list of the sets of prefixes that may
301      generate that particular block.  This then goes through a series of
302      transformations to minimise the amount of CHECK lines that need to be
303      written by taking advantage of common prefixes.
304  """
305
306  def _block_key(tool_args, prefixes):
307    """ Get a hashable key based on the current tool_args and prefixes.
308    """
309    return ' '.join([tool_args] + prefixes)
310
311  all_blocks = {}
312  max_block_len = 0
313
314  # A cache of the furthest-back position in any block list of the first
315  # instance of each block, indexed by the block itself.
316  farthest_indexes = defaultdict(int)
317
318  # Run the tool for each run line to generate all of the blocks.
319  for prefixes, tool_args in run_infos:
320    key = _block_key(tool_args, prefixes)
321    raw_tool_output = common.invoke_tool(args.llvm_mca_binary,
322                                         tool_args,
323                                         test_path)
324
325    # Replace any lines consisting of purely whitespace with empty lines.
326    raw_tool_output = '\n'.join(line if line.strip() else ''
327                                for line in raw_tool_output.splitlines())
328
329    # Split blocks, stripping all trailing whitespace, but keeping preceding
330    # whitespace except for newlines so that columns will line up visually.
331    all_blocks[key] = [b.lstrip('\n').rstrip()
332                       for b in raw_tool_output.split('\n\n')]
333    max_block_len = max(max_block_len, len(all_blocks[key]))
334
335    # Attempt to align matching blocks until no more changes can be made.
336    made_changes = True
337    while made_changes:
338      made_changes = _align_matching_blocks(all_blocks, farthest_indexes)
339
340  # If necessary, pad the lists of blocks with empty blocks so that they are
341  # all the same length.
342  for key in all_blocks:
343    len_to_pad = max_block_len - len(all_blocks[key])
344    all_blocks[key] += [''] * len_to_pad
345
346  # Create the block_infos structure where it is a nested dict in the form of:
347  # block number -> block text -> list of prefix sets
348  block_infos = defaultdict(lambda: defaultdict(list))
349  for prefixes, tool_args in run_infos:
350    key = _block_key(tool_args, prefixes)
351    for block_num, block_text in enumerate(all_blocks[key]):
352      block_infos[block_num][block_text].append(set(prefixes))
353
354  # Now go through the block_infos structure and attempt to smartly prune the
355  # number of prefixes per block to the minimal set possible to output.
356  for block_num in range(len(block_infos)):
357    # When there are multiple block texts for a block num, remove any
358    # prefixes that are common to more than one of them.
359    # E.g. [ [{ALL,FOO}] , [{ALL,BAR}] ] -> [ [{FOO}] , [{BAR}] ]
360    all_sets = [s for s in block_infos[block_num].values()]
361    pruned_sets = []
362
363    for i, setlist in enumerate(all_sets):
364      other_set_values = set([elem for j, setlist2 in enumerate(all_sets)
365                              for set_ in setlist2 for elem in set_
366                              if i != j])
367      pruned_sets.append([s - other_set_values for s in setlist])
368
369    for i, block_text in enumerate(block_infos[block_num]):
370
371      # When a block text matches multiple sets of prefixes, try removing any
372      # prefixes that aren't common to all of them.
373      # E.g. [ {ALL,FOO} , {ALL,BAR} ] -> [{ALL}]
374      common_values = set.intersection(*pruned_sets[i])
375      if common_values:
376        pruned_sets[i] = [common_values]
377
378      # Everything should be uniqued as much as possible by now.  Apply the
379      # newly pruned sets to the block_infos structure.
380      # If there are any blocks of text that still match multiple prefixes,
381      # output a warning.
382      current_set = set()
383      for s in pruned_sets[i]:
384        s = sorted(list(s))
385        if s:
386          current_set.add(s[0])
387          if len(s) > 1:
388            _warn('Multiple prefixes generating same output: {} '
389                  '(discarding {})'.format(','.join(s), ','.join(s[1:])))
390
391      if block_text and not current_set:
392        raise Error(
393          'block not captured by existing prefixes:\n\n{}'.format(block_text))
394      block_infos[block_num][block_text] = sorted(list(current_set))
395
396    # If we have multiple block_texts, try to break them down further to avoid
397    # the case where we have very similar block_texts repeated after each
398    # other.
399    if common_prefix and len(block_infos[block_num]) > 1:
400      # We'll only attempt this if each of the block_texts have the same number
401      # of lines as each other.
402      same_num_Lines = (len(set(len(k.splitlines())
403                                for k in block_infos[block_num].keys())) == 1)
404      if same_num_Lines:
405        breakdown = _break_down_block(block_infos[block_num], common_prefix)
406        if breakdown:
407          block_infos[block_num] = breakdown
408
409  return block_infos
410
411
412def _write_block(output, block, not_prefix_set, common_prefix, prefix_pad):
413  """ Write an individual block, with correct padding on the prefixes.
414      Returns a set of all of the prefixes that it has written.
415  """
416  end_prefix = ':     '
417  previous_prefix = None
418  num_lines_of_prefix = 0
419  written_prefixes = set()
420
421  for prefix, line in block:
422    if prefix in not_prefix_set:
423      _warn('not writing for prefix {0} due to presence of "{0}-NOT:" '
424            'in input file.'.format(prefix))
425      continue
426
427    # If the previous line isn't already blank and we're writing more than one
428    # line for the current prefix output a blank line first, unless either the
429    # current of previous prefix is common to all.
430    num_lines_of_prefix += 1
431    if prefix != previous_prefix:
432      if output and output[-1]:
433        if num_lines_of_prefix > 1 or any(p == common_prefix
434                                          for p in (prefix, previous_prefix)):
435          output.append('')
436      num_lines_of_prefix = 0
437      previous_prefix = prefix
438
439    written_prefixes.add(prefix)
440    output.append(
441        '{} {}{}{} {}'.format(COMMENT_CHAR,
442                              prefix,
443                              end_prefix,
444                              ' ' * (prefix_pad - len(prefix)),
445                              line).rstrip())
446    end_prefix = '-NEXT:'
447
448  output.append('')
449  return written_prefixes
450
451
452def _write_output(test_path, input_lines, prefix_list, block_infos,  # noqa
453                  args, common_prefix, prefix_pad):
454  prefix_set = set([prefix for prefixes, _ in prefix_list
455                    for prefix in prefixes])
456  not_prefix_set = set()
457
458  output_lines = []
459  for input_line in input_lines:
460    if input_line.startswith(ADVERT_PREFIX):
461      continue
462
463    if input_line.startswith(COMMENT_CHAR):
464      m = common.CHECK_RE.match(input_line)
465      try:
466        prefix = m.group(1)
467      except AttributeError:
468        prefix = None
469
470      if '{}-NOT:'.format(prefix) in input_line:
471        not_prefix_set.add(prefix)
472
473      if prefix not in prefix_set or prefix in not_prefix_set:
474        output_lines.append(input_line)
475        continue
476
477    if common.should_add_line_to_output(input_line, prefix_set):
478      # This input line of the function body will go as-is into the output.
479      # Except make leading whitespace uniform: 2 spaces.
480      input_line = common.SCRUB_LEADING_WHITESPACE_RE.sub(r'  ', input_line)
481
482      # Skip empty lines if the previous output line is also empty.
483      if input_line or output_lines[-1]:
484        output_lines.append(input_line)
485    else:
486      continue
487
488  # Add a blank line before the new checks if required.
489  if len(output_lines) > 0 and output_lines[-1]:
490    output_lines.append('')
491
492  output_check_lines = []
493  used_prefixes = set()
494  for block_num in range(len(block_infos)):
495    if type(block_infos[block_num]) is list:
496      # The block is of the type output from _break_down_block().
497      used_prefixes |= _write_block(output_check_lines,
498                                    block_infos[block_num],
499                                    not_prefix_set,
500                                    common_prefix,
501                                    prefix_pad)
502    else:
503      # _break_down_block() was unable to do do anything so output the block
504      # as-is.
505
506      # Rather than writing out each block as soon we encounter it, save it
507      # indexed by prefix so that we can write all of the blocks out sorted by
508      # prefix at the end.
509      output_blocks = defaultdict(list)
510
511      for block_text in sorted(block_infos[block_num]):
512
513        if not block_text:
514          continue
515
516        lines = block_text.split('\n')
517        for prefix in block_infos[block_num][block_text]:
518          assert prefix not in output_blocks
519          used_prefixes |= _write_block(output_blocks[prefix],
520                                        [(prefix, line) for line in lines],
521                                        not_prefix_set,
522                                        common_prefix,
523                                        prefix_pad)
524
525      for prefix in sorted(output_blocks):
526        output_check_lines.extend(output_blocks[prefix])
527
528  unused_prefixes = (prefix_set - not_prefix_set) - used_prefixes
529  if unused_prefixes:
530    raise Error('unused prefixes: {}'.format(sorted(unused_prefixes)))
531
532  if output_check_lines:
533    output_lines.insert(0, ADVERT)
534    output_lines.extend(output_check_lines)
535
536  # The file should not end with two newlines. It creates unnecessary churn.
537  while len(output_lines) > 0 and output_lines[-1] == '':
538    output_lines.pop()
539
540  if input_lines == output_lines:
541    sys.stderr.write('            [unchanged]\n')
542    return
543  sys.stderr.write('      [{} lines total]\n'.format(len(output_lines)))
544
545  if args.verbose:
546    sys.stderr.write(
547        'Writing {} lines to {}...\n\n'.format(len(output_lines), test_path))
548
549  with open(test_path, 'wb') as f:
550    f.writelines(['{}\n'.format(l).encode() for l in output_lines])
551
552def main():
553  args = _parse_args()
554  test_paths = [test for pattern in args.tests for test in glob.glob(pattern)]
555  for test_path in test_paths:
556    sys.stderr.write('Test: {}\n'.format(test_path))
557
558    # Call this per test. By default each warning will only be written once
559    # per source location. Reset the warning filter so that now each warning
560    # will be written once per source location per test.
561    _configure_warnings(args)
562
563    if args.verbose:
564      sys.stderr.write(
565          'Scanning for RUN lines in test file: {}\n'.format(test_path))
566
567    if not os.path.isfile(test_path):
568      raise Error('could not find test file: {}'.format(test_path))
569
570    with open(test_path) as f:
571      input_lines = [l.rstrip() for l in f]
572
573    run_lines = _find_run_lines(input_lines, args)
574    run_infos = _get_run_infos(run_lines, args)
575    common_prefix, prefix_pad = _get_useful_prefix_info(run_infos)
576    block_infos = _get_block_infos(run_infos, test_path, args, common_prefix)
577    _write_output(test_path,
578                  input_lines,
579                  run_infos,
580                  block_infos,
581                  args,
582                  common_prefix,
583                  prefix_pad)
584
585  return 0
586
587
588if __name__ == '__main__':
589  try:
590    warnings.showwarning = _showwarning
591    sys.exit(main())
592  except Error as e:
593    sys.stdout.write('error: {}\n'.format(e))
594    sys.exit(1)
595