xref: /netbsd-src/external/apache2/llvm/dist/llvm/utils/abtest.py (revision 82d56013d7b633d116a93943de88e08335357a7c)
17330f729Sjoerg#!/usr/bin/env python
27330f729Sjoerg#
37330f729Sjoerg# Given a previous good compile narrow down miscompiles.
47330f729Sjoerg# Expects two directories named "before" and "after" each containing a set of
57330f729Sjoerg# assembly or object files where the "after" version is assumed to be broken.
67330f729Sjoerg# You also have to provide a script called "link_test". It is called with a
77330f729Sjoerg# list of files which should be linked together and result tested. "link_test"
87330f729Sjoerg# should returns with exitcode 0 if the linking and testing succeeded.
97330f729Sjoerg#
10*82d56013Sjoerg# If a response file is provided, only the object files that are listed in the
11*82d56013Sjoerg# file are inspected. In addition, the "link_test" is called with a temporary
12*82d56013Sjoerg# response file representing one iteration of bisection.
13*82d56013Sjoerg#
147330f729Sjoerg# abtest.py operates by taking all files from the "before" directory and
157330f729Sjoerg# in each step replacing one of them with a file from the "bad" directory.
167330f729Sjoerg#
177330f729Sjoerg# Additionally you can perform the same steps with a single .s file. In this
187330f729Sjoerg# mode functions are identified by " -- Begin function FunctionName" and
197330f729Sjoerg# " -- End function" markers. The abtest.py then takes all
207330f729Sjoerg# function from the file in the "before" directory and replaces one function
217330f729Sjoerg# with the corresponding function from the "bad" file in each step.
227330f729Sjoerg#
237330f729Sjoerg# Example usage to identify miscompiled files:
247330f729Sjoerg#    1. Create a link_test script, make it executable. Simple Example:
257330f729Sjoerg#          clang "$@" -o /tmp/test && /tmp/test || echo "PROBLEM"
267330f729Sjoerg#    2. Run the script to figure out which files are miscompiled:
277330f729Sjoerg#       > ./abtest.py
287330f729Sjoerg#       somefile.s: ok
297330f729Sjoerg#       someotherfile.s: skipped: same content
307330f729Sjoerg#       anotherfile.s: failed: './link_test' exitcode != 0
317330f729Sjoerg#       ...
327330f729Sjoerg# Example usage to identify miscompiled functions inside a file:
337330f729Sjoerg#    3. Run the tests on a single file (assuming before/file.s and
347330f729Sjoerg#       after/file.s exist)
357330f729Sjoerg#       > ./abtest.py file.s
367330f729Sjoerg#       funcname1 [0/XX]: ok
377330f729Sjoerg#       funcname2 [1/XX]: ok
387330f729Sjoerg#       funcname3 [2/XX]: skipped: same content
397330f729Sjoerg#       funcname4 [3/XX]: failed: './link_test' exitcode != 0
407330f729Sjoerg#       ...
417330f729Sjoergfrom fnmatch import filter
427330f729Sjoergfrom sys import stderr
437330f729Sjoergimport argparse
447330f729Sjoergimport filecmp
457330f729Sjoergimport os
467330f729Sjoergimport subprocess
477330f729Sjoergimport sys
48*82d56013Sjoergimport tempfile
497330f729Sjoerg
50*82d56013Sjoerg# Specify LINKTEST via `--test`. Default value is './link_test'.
51*82d56013SjoergLINKTEST = ""
527330f729SjoergESCAPE = "\033[%sm"
537330f729SjoergBOLD = ESCAPE % "1"
547330f729SjoergRED = ESCAPE % "31"
557330f729SjoergNORMAL = ESCAPE % "0"
567330f729SjoergFAILED = RED + "failed" + NORMAL
577330f729Sjoerg
587330f729Sjoerg
597330f729Sjoergdef find(dir, file_filter=None):
607330f729Sjoerg    files = [
617330f729Sjoerg        walkdir[0]+"/"+file
627330f729Sjoerg        for walkdir in os.walk(dir)
637330f729Sjoerg        for file in walkdir[2]
647330f729Sjoerg    ]
657330f729Sjoerg    if file_filter is not None:
667330f729Sjoerg        files = filter(files, file_filter)
677330f729Sjoerg    return sorted(files)
687330f729Sjoerg
697330f729Sjoerg
707330f729Sjoergdef error(message):
717330f729Sjoerg    stderr.write("Error: %s\n" % (message,))
727330f729Sjoerg
737330f729Sjoerg
747330f729Sjoergdef warn(message):
757330f729Sjoerg    stderr.write("Warning: %s\n" % (message,))
767330f729Sjoerg
777330f729Sjoerg
787330f729Sjoergdef info(message):
797330f729Sjoerg    stderr.write("Info: %s\n" % (message,))
807330f729Sjoerg
817330f729Sjoerg
827330f729Sjoergdef announce_test(name):
837330f729Sjoerg    stderr.write("%s%s%s: " % (BOLD, name, NORMAL))
847330f729Sjoerg    stderr.flush()
857330f729Sjoerg
867330f729Sjoerg
877330f729Sjoergdef announce_result(result):
887330f729Sjoerg    stderr.write(result)
897330f729Sjoerg    stderr.write("\n")
907330f729Sjoerg    stderr.flush()
917330f729Sjoerg
927330f729Sjoerg
937330f729Sjoergdef format_namelist(l):
947330f729Sjoerg    result = ", ".join(l[0:3])
957330f729Sjoerg    if len(l) > 3:
967330f729Sjoerg        result += "... (%d total)" % len(l)
977330f729Sjoerg    return result
987330f729Sjoerg
997330f729Sjoerg
1007330f729Sjoergdef check_sanity(choices, perform_test):
1017330f729Sjoerg    announce_test("sanity check A")
1027330f729Sjoerg    all_a = {name: a_b[0] for name, a_b in choices}
1037330f729Sjoerg    res_a = perform_test(all_a)
1047330f729Sjoerg    if res_a is not True:
1057330f729Sjoerg        error("Picking all choices from A failed to pass the test")
1067330f729Sjoerg        sys.exit(1)
1077330f729Sjoerg
1087330f729Sjoerg    announce_test("sanity check B (expecting failure)")
1097330f729Sjoerg    all_b = {name: a_b[1] for name, a_b in choices}
1107330f729Sjoerg    res_b = perform_test(all_b)
1117330f729Sjoerg    if res_b is not False:
1127330f729Sjoerg        error("Picking all choices from B did unexpectedly pass the test")
1137330f729Sjoerg        sys.exit(1)
1147330f729Sjoerg
1157330f729Sjoerg
1167330f729Sjoergdef check_sequentially(choices, perform_test):
1177330f729Sjoerg    known_good = set()
1187330f729Sjoerg    all_a = {name: a_b[0] for name, a_b in choices}
1197330f729Sjoerg    n = 1
1207330f729Sjoerg    for name, a_b in sorted(choices):
1217330f729Sjoerg        picks = dict(all_a)
1227330f729Sjoerg        picks[name] = a_b[1]
1237330f729Sjoerg        announce_test("checking %s [%d/%d]" % (name, n, len(choices)))
1247330f729Sjoerg        n += 1
1257330f729Sjoerg        res = perform_test(picks)
1267330f729Sjoerg        if res is True:
1277330f729Sjoerg            known_good.add(name)
1287330f729Sjoerg    return known_good
1297330f729Sjoerg
1307330f729Sjoerg
1317330f729Sjoergdef check_bisect(choices, perform_test):
1327330f729Sjoerg    known_good = set()
1337330f729Sjoerg    if len(choices) == 0:
1347330f729Sjoerg        return known_good
1357330f729Sjoerg
1367330f729Sjoerg    choice_map = dict(choices)
1377330f729Sjoerg    all_a = {name: a_b[0] for name, a_b in choices}
1387330f729Sjoerg
1397330f729Sjoerg    def test_partition(partition, upcoming_partition):
1407330f729Sjoerg        # Compute the maximum number of checks we have to do in the worst case.
1417330f729Sjoerg        max_remaining_steps = len(partition) * 2 - 1
1427330f729Sjoerg        if upcoming_partition is not None:
1437330f729Sjoerg            max_remaining_steps += len(upcoming_partition) * 2 - 1
1447330f729Sjoerg        for x in partitions_to_split:
1457330f729Sjoerg            max_remaining_steps += (len(x) - 1) * 2
1467330f729Sjoerg
1477330f729Sjoerg        picks = dict(all_a)
1487330f729Sjoerg        for x in partition:
1497330f729Sjoerg            picks[x] = choice_map[x][1]
1507330f729Sjoerg        announce_test("checking %s [<=%d remaining]" %
1517330f729Sjoerg                      (format_namelist(partition), max_remaining_steps))
1527330f729Sjoerg        res = perform_test(picks)
1537330f729Sjoerg        if res is True:
1547330f729Sjoerg            known_good.update(partition)
1557330f729Sjoerg        elif len(partition) > 1:
1567330f729Sjoerg            partitions_to_split.insert(0, partition)
1577330f729Sjoerg
1587330f729Sjoerg    # TODO:
1597330f729Sjoerg    # - We could optimize based on the knowledge that when splitting a failed
1607330f729Sjoerg    #   partition into two and one side checks out okay then we can deduce that
1617330f729Sjoerg    #   the other partition must be a failure.
1627330f729Sjoerg    all_choice_names = [name for name, _ in choices]
1637330f729Sjoerg    partitions_to_split = [all_choice_names]
1647330f729Sjoerg    while len(partitions_to_split) > 0:
1657330f729Sjoerg        partition = partitions_to_split.pop()
1667330f729Sjoerg
1677330f729Sjoerg        middle = len(partition) // 2
1687330f729Sjoerg        left = partition[0:middle]
1697330f729Sjoerg        right = partition[middle:]
1707330f729Sjoerg
1717330f729Sjoerg        if len(left) > 0:
1727330f729Sjoerg            test_partition(left, right)
1737330f729Sjoerg        assert len(right) > 0
1747330f729Sjoerg        test_partition(right, None)
1757330f729Sjoerg
1767330f729Sjoerg    return known_good
1777330f729Sjoerg
1787330f729Sjoerg
1797330f729Sjoergdef extract_functions(file):
1807330f729Sjoerg    functions = []
1817330f729Sjoerg    in_function = None
1827330f729Sjoerg    for line in open(file):
1837330f729Sjoerg        marker = line.find(" -- Begin function ")
1847330f729Sjoerg        if marker != -1:
1857330f729Sjoerg            if in_function is not None:
1867330f729Sjoerg                warn("Missing end of function %s" % (in_function,))
1877330f729Sjoerg            funcname = line[marker + 19:-1]
1887330f729Sjoerg            in_function = funcname
1897330f729Sjoerg            text = line
1907330f729Sjoerg            continue
1917330f729Sjoerg
1927330f729Sjoerg        marker = line.find(" -- End function")
1937330f729Sjoerg        if marker != -1:
1947330f729Sjoerg            text += line
1957330f729Sjoerg            functions.append((in_function, text))
1967330f729Sjoerg            in_function = None
1977330f729Sjoerg            continue
1987330f729Sjoerg
1997330f729Sjoerg        if in_function is not None:
2007330f729Sjoerg            text += line
2017330f729Sjoerg    return functions
2027330f729Sjoerg
2037330f729Sjoerg
2047330f729Sjoergdef replace_functions(source, dest, replacements):
2057330f729Sjoerg    out = open(dest, "w")
2067330f729Sjoerg    skip = False
2077330f729Sjoerg    in_function = None
2087330f729Sjoerg    for line in open(source):
2097330f729Sjoerg        marker = line.find(" -- Begin function ")
2107330f729Sjoerg        if marker != -1:
2117330f729Sjoerg            if in_function is not None:
2127330f729Sjoerg                warn("Missing end of function %s" % (in_function,))
2137330f729Sjoerg            funcname = line[marker + 19:-1]
2147330f729Sjoerg            in_function = funcname
2157330f729Sjoerg            replacement = replacements.get(in_function)
2167330f729Sjoerg            if replacement is not None:
2177330f729Sjoerg                out.write(replacement)
2187330f729Sjoerg                skip = True
2197330f729Sjoerg        else:
2207330f729Sjoerg            marker = line.find(" -- End function")
2217330f729Sjoerg            if marker != -1:
2227330f729Sjoerg                in_function = None
2237330f729Sjoerg                if skip:
2247330f729Sjoerg                    skip = False
2257330f729Sjoerg                    continue
2267330f729Sjoerg
2277330f729Sjoerg        if not skip:
2287330f729Sjoerg            out.write(line)
2297330f729Sjoerg
2307330f729Sjoerg
2317330f729Sjoergdef testrun(files):
2327330f729Sjoerg    linkline = "%s %s" % (LINKTEST, " ".join(files),)
2337330f729Sjoerg    res = subprocess.call(linkline, shell=True)
2347330f729Sjoerg    if res != 0:
2357330f729Sjoerg        announce_result(FAILED + ": '%s' exitcode != 0" % LINKTEST)
2367330f729Sjoerg        return False
2377330f729Sjoerg    else:
2387330f729Sjoerg        announce_result("ok")
2397330f729Sjoerg        return True
2407330f729Sjoerg
2417330f729Sjoerg
242*82d56013Sjoergdef prepare_files(gooddir, baddir, rspfile):
243*82d56013Sjoerg    files_a = []
244*82d56013Sjoerg    files_b = []
245*82d56013Sjoerg
246*82d56013Sjoerg    if rspfile is not None:
247*82d56013Sjoerg        def get_basename(name):
248*82d56013Sjoerg            # remove prefix
249*82d56013Sjoerg            if name.startswith(gooddir):
250*82d56013Sjoerg                return name[len(gooddir):]
251*82d56013Sjoerg            if name.startswith(baddir):
252*82d56013Sjoerg                return name[len(baddir):]
253*82d56013Sjoerg            assert False, ""
254*82d56013Sjoerg
255*82d56013Sjoerg        with open(rspfile, "r") as rf:
256*82d56013Sjoerg            for line in rf.read().splitlines():
257*82d56013Sjoerg                for obj in line.split():
258*82d56013Sjoerg                    assert not os.path.isabs(obj), "TODO: support abs path"
259*82d56013Sjoerg                    files_a.append(gooddir + "/" + obj)
260*82d56013Sjoerg                    files_b.append(baddir + "/" + obj)
261*82d56013Sjoerg    else:
262*82d56013Sjoerg        get_basename = lambda name: os.path.basename(name)
2637330f729Sjoerg        files_a = find(gooddir, "*")
2647330f729Sjoerg        files_b = find(baddir, "*")
2657330f729Sjoerg
266*82d56013Sjoerg    basenames_a = set(map(get_basename, files_a))
267*82d56013Sjoerg    basenames_b = set(map(get_basename, files_b))
2687330f729Sjoerg
2697330f729Sjoerg    for name in files_b:
270*82d56013Sjoerg        basename = get_basename(name)
2717330f729Sjoerg        if basename not in basenames_a:
2727330f729Sjoerg            warn("There is no corresponding file to '%s' in %s" %
2737330f729Sjoerg                 (name, gooddir))
2747330f729Sjoerg    choices = []
2757330f729Sjoerg    skipped = []
2767330f729Sjoerg    for name in files_a:
277*82d56013Sjoerg        basename = get_basename(name)
2787330f729Sjoerg        if basename not in basenames_b:
2797330f729Sjoerg            warn("There is no corresponding file to '%s' in %s" %
2807330f729Sjoerg                 (name, baddir))
2817330f729Sjoerg
2827330f729Sjoerg        file_a = gooddir + "/" + basename
2837330f729Sjoerg        file_b = baddir + "/" + basename
2847330f729Sjoerg        if filecmp.cmp(file_a, file_b):
2857330f729Sjoerg            skipped.append(basename)
2867330f729Sjoerg            continue
2877330f729Sjoerg
2887330f729Sjoerg        choice = (basename, (file_a, file_b))
2897330f729Sjoerg        choices.append(choice)
2907330f729Sjoerg
2917330f729Sjoerg    if len(skipped) > 0:
2927330f729Sjoerg        info("Skipped (same content): %s" % format_namelist(skipped))
2937330f729Sjoerg
2947330f729Sjoerg    def perform_test(picks):
2957330f729Sjoerg        files = []
2967330f729Sjoerg        # Note that we iterate over files_a so we don't change the order
2977330f729Sjoerg        # (cannot use `picks` as it is a dictionary without order)
2987330f729Sjoerg        for x in files_a:
299*82d56013Sjoerg            basename = get_basename(x)
3007330f729Sjoerg            picked = picks.get(basename)
3017330f729Sjoerg            if picked is None:
3027330f729Sjoerg                assert basename in skipped
3037330f729Sjoerg                files.append(x)
3047330f729Sjoerg            else:
3057330f729Sjoerg                files.append(picked)
306*82d56013Sjoerg
307*82d56013Sjoerg        # If response file is used, create a temporary response file for the
308*82d56013Sjoerg        # picked files.
309*82d56013Sjoerg        if rspfile is not None:
310*82d56013Sjoerg            with tempfile.NamedTemporaryFile('w', suffix='.rsp',
311*82d56013Sjoerg                                             delete=False) as tf:
312*82d56013Sjoerg                tf.write(" ".join(files))
313*82d56013Sjoerg                tf.flush()
314*82d56013Sjoerg            ret = testrun([tf.name])
315*82d56013Sjoerg            os.remove(tf.name)
316*82d56013Sjoerg            return ret
317*82d56013Sjoerg
3187330f729Sjoerg        return testrun(files)
3197330f729Sjoerg
3207330f729Sjoerg    return perform_test, choices
3217330f729Sjoerg
3227330f729Sjoerg
3237330f729Sjoergdef prepare_functions(to_check, gooddir, goodfile, badfile):
3247330f729Sjoerg    files_good = find(gooddir, "*")
3257330f729Sjoerg
3267330f729Sjoerg    functions_a = extract_functions(goodfile)
3277330f729Sjoerg    functions_a_map = dict(functions_a)
3287330f729Sjoerg    functions_b_map = dict(extract_functions(badfile))
3297330f729Sjoerg
3307330f729Sjoerg    for name in functions_b_map.keys():
3317330f729Sjoerg        if name not in functions_a_map:
3327330f729Sjoerg            warn("Function '%s' missing from good file" % name)
3337330f729Sjoerg    choices = []
3347330f729Sjoerg    skipped = []
3357330f729Sjoerg    for name, candidate_a in functions_a:
3367330f729Sjoerg        candidate_b = functions_b_map.get(name)
3377330f729Sjoerg        if candidate_b is None:
3387330f729Sjoerg            warn("Function '%s' missing from bad file" % name)
3397330f729Sjoerg            continue
3407330f729Sjoerg        if candidate_a == candidate_b:
3417330f729Sjoerg            skipped.append(name)
3427330f729Sjoerg            continue
3437330f729Sjoerg        choice = name, (candidate_a, candidate_b)
3447330f729Sjoerg        choices.append(choice)
3457330f729Sjoerg
3467330f729Sjoerg    if len(skipped) > 0:
3477330f729Sjoerg        info("Skipped (same content): %s" % format_namelist(skipped))
3487330f729Sjoerg
3497330f729Sjoerg    combined_file = '/tmp/combined2.s'
3507330f729Sjoerg    files = []
3517330f729Sjoerg    found_good_file = False
3527330f729Sjoerg    for c in files_good:
3537330f729Sjoerg        if os.path.basename(c) == to_check:
3547330f729Sjoerg            found_good_file = True
3557330f729Sjoerg            files.append(combined_file)
3567330f729Sjoerg            continue
3577330f729Sjoerg        files.append(c)
3587330f729Sjoerg    assert found_good_file
3597330f729Sjoerg
3607330f729Sjoerg    def perform_test(picks):
3617330f729Sjoerg        for name, x in picks.items():
3627330f729Sjoerg            assert x == functions_a_map[name] or x == functions_b_map[name]
3637330f729Sjoerg        replace_functions(goodfile, combined_file, picks)
3647330f729Sjoerg        return testrun(files)
3657330f729Sjoerg    return perform_test, choices
3667330f729Sjoerg
3677330f729Sjoerg
3687330f729Sjoergdef main():
3697330f729Sjoerg    parser = argparse.ArgumentParser()
3707330f729Sjoerg    parser.add_argument('--a', dest='dir_a', default='before')
3717330f729Sjoerg    parser.add_argument('--b', dest='dir_b', default='after')
372*82d56013Sjoerg    parser.add_argument('--rsp', default=None)
373*82d56013Sjoerg    parser.add_argument('--test', default='./link_test')
3747330f729Sjoerg    parser.add_argument('--insane', help='Skip sanity check',
3757330f729Sjoerg                        action='store_true')
3767330f729Sjoerg    parser.add_argument('--seq',
3777330f729Sjoerg                        help='Check sequentially instead of bisection',
3787330f729Sjoerg                        action='store_true')
3797330f729Sjoerg    parser.add_argument('file', metavar='file', nargs='?')
3807330f729Sjoerg    config = parser.parse_args()
3817330f729Sjoerg
3827330f729Sjoerg    gooddir = config.dir_a
3837330f729Sjoerg    baddir = config.dir_b
384*82d56013Sjoerg    rspfile = config.rsp
385*82d56013Sjoerg    global LINKTEST
386*82d56013Sjoerg    LINKTEST = config.test
3877330f729Sjoerg
3887330f729Sjoerg    # Preparation phase: Creates a dictionary mapping names to a list of two
3897330f729Sjoerg    # choices each. The bisection algorithm will pick one choice for each name
3907330f729Sjoerg    # and then run the perform_test function on it.
3917330f729Sjoerg    if config.file is not None:
3927330f729Sjoerg        goodfile = gooddir + "/" + config.file
3937330f729Sjoerg        badfile = baddir + "/" + config.file
3947330f729Sjoerg        perform_test, choices = prepare_functions(config.file, gooddir,
3957330f729Sjoerg                                                  goodfile, badfile)
3967330f729Sjoerg    else:
397*82d56013Sjoerg        perform_test, choices = prepare_files(gooddir, baddir, rspfile)
3987330f729Sjoerg
3997330f729Sjoerg    info("%d bisection choices" % len(choices))
4007330f729Sjoerg
4017330f729Sjoerg    # "Checking whether build environment is sane ..."
4027330f729Sjoerg    if not config.insane:
4037330f729Sjoerg        if not os.access(LINKTEST, os.X_OK):
4047330f729Sjoerg            error("Expect '%s' to be present and executable" % (LINKTEST,))
4057330f729Sjoerg            exit(1)
4067330f729Sjoerg
4077330f729Sjoerg        check_sanity(choices, perform_test)
4087330f729Sjoerg
4097330f729Sjoerg    if config.seq:
4107330f729Sjoerg        known_good = check_sequentially(choices, perform_test)
4117330f729Sjoerg    else:
4127330f729Sjoerg        known_good = check_bisect(choices, perform_test)
4137330f729Sjoerg
4147330f729Sjoerg    stderr.write("")
4157330f729Sjoerg    if len(known_good) != len(choices):
4167330f729Sjoerg        stderr.write("== Failing ==\n")
4177330f729Sjoerg        for name, _ in choices:
4187330f729Sjoerg            if name not in known_good:
4197330f729Sjoerg                stderr.write("%s\n" % name)
4207330f729Sjoerg    else:
4217330f729Sjoerg        # This shouldn't happen when the sanity check works...
4227330f729Sjoerg        # Maybe link_test isn't deterministic?
4237330f729Sjoerg        stderr.write("Could not identify failing parts?!?")
4247330f729Sjoerg
4257330f729Sjoerg
4267330f729Sjoergif __name__ == '__main__':
4277330f729Sjoerg    main()
428