17330f729Sjoerg#!/usr/bin/env python 27330f729Sjoerg# 37330f729Sjoerg# Given a previous good compile narrow down miscompiles. 47330f729Sjoerg# Expects two directories named "before" and "after" each containing a set of 57330f729Sjoerg# assembly or object files where the "after" version is assumed to be broken. 67330f729Sjoerg# You also have to provide a script called "link_test". It is called with a 77330f729Sjoerg# list of files which should be linked together and result tested. "link_test" 87330f729Sjoerg# should returns with exitcode 0 if the linking and testing succeeded. 97330f729Sjoerg# 10*82d56013Sjoerg# If a response file is provided, only the object files that are listed in the 11*82d56013Sjoerg# file are inspected. In addition, the "link_test" is called with a temporary 12*82d56013Sjoerg# response file representing one iteration of bisection. 13*82d56013Sjoerg# 147330f729Sjoerg# abtest.py operates by taking all files from the "before" directory and 157330f729Sjoerg# in each step replacing one of them with a file from the "bad" directory. 167330f729Sjoerg# 177330f729Sjoerg# Additionally you can perform the same steps with a single .s file. In this 187330f729Sjoerg# mode functions are identified by " -- Begin function FunctionName" and 197330f729Sjoerg# " -- End function" markers. The abtest.py then takes all 207330f729Sjoerg# function from the file in the "before" directory and replaces one function 217330f729Sjoerg# with the corresponding function from the "bad" file in each step. 227330f729Sjoerg# 237330f729Sjoerg# Example usage to identify miscompiled files: 247330f729Sjoerg# 1. Create a link_test script, make it executable. Simple Example: 257330f729Sjoerg# clang "$@" -o /tmp/test && /tmp/test || echo "PROBLEM" 267330f729Sjoerg# 2. Run the script to figure out which files are miscompiled: 277330f729Sjoerg# > ./abtest.py 287330f729Sjoerg# somefile.s: ok 297330f729Sjoerg# someotherfile.s: skipped: same content 307330f729Sjoerg# anotherfile.s: failed: './link_test' exitcode != 0 317330f729Sjoerg# ... 327330f729Sjoerg# Example usage to identify miscompiled functions inside a file: 337330f729Sjoerg# 3. Run the tests on a single file (assuming before/file.s and 347330f729Sjoerg# after/file.s exist) 357330f729Sjoerg# > ./abtest.py file.s 367330f729Sjoerg# funcname1 [0/XX]: ok 377330f729Sjoerg# funcname2 [1/XX]: ok 387330f729Sjoerg# funcname3 [2/XX]: skipped: same content 397330f729Sjoerg# funcname4 [3/XX]: failed: './link_test' exitcode != 0 407330f729Sjoerg# ... 417330f729Sjoergfrom fnmatch import filter 427330f729Sjoergfrom sys import stderr 437330f729Sjoergimport argparse 447330f729Sjoergimport filecmp 457330f729Sjoergimport os 467330f729Sjoergimport subprocess 477330f729Sjoergimport sys 48*82d56013Sjoergimport tempfile 497330f729Sjoerg 50*82d56013Sjoerg# Specify LINKTEST via `--test`. Default value is './link_test'. 51*82d56013SjoergLINKTEST = "" 527330f729SjoergESCAPE = "\033[%sm" 537330f729SjoergBOLD = ESCAPE % "1" 547330f729SjoergRED = ESCAPE % "31" 557330f729SjoergNORMAL = ESCAPE % "0" 567330f729SjoergFAILED = RED + "failed" + NORMAL 577330f729Sjoerg 587330f729Sjoerg 597330f729Sjoergdef find(dir, file_filter=None): 607330f729Sjoerg files = [ 617330f729Sjoerg walkdir[0]+"/"+file 627330f729Sjoerg for walkdir in os.walk(dir) 637330f729Sjoerg for file in walkdir[2] 647330f729Sjoerg ] 657330f729Sjoerg if file_filter is not None: 667330f729Sjoerg files = filter(files, file_filter) 677330f729Sjoerg return sorted(files) 687330f729Sjoerg 697330f729Sjoerg 707330f729Sjoergdef error(message): 717330f729Sjoerg stderr.write("Error: %s\n" % (message,)) 727330f729Sjoerg 737330f729Sjoerg 747330f729Sjoergdef warn(message): 757330f729Sjoerg stderr.write("Warning: %s\n" % (message,)) 767330f729Sjoerg 777330f729Sjoerg 787330f729Sjoergdef info(message): 797330f729Sjoerg stderr.write("Info: %s\n" % (message,)) 807330f729Sjoerg 817330f729Sjoerg 827330f729Sjoergdef announce_test(name): 837330f729Sjoerg stderr.write("%s%s%s: " % (BOLD, name, NORMAL)) 847330f729Sjoerg stderr.flush() 857330f729Sjoerg 867330f729Sjoerg 877330f729Sjoergdef announce_result(result): 887330f729Sjoerg stderr.write(result) 897330f729Sjoerg stderr.write("\n") 907330f729Sjoerg stderr.flush() 917330f729Sjoerg 927330f729Sjoerg 937330f729Sjoergdef format_namelist(l): 947330f729Sjoerg result = ", ".join(l[0:3]) 957330f729Sjoerg if len(l) > 3: 967330f729Sjoerg result += "... (%d total)" % len(l) 977330f729Sjoerg return result 987330f729Sjoerg 997330f729Sjoerg 1007330f729Sjoergdef check_sanity(choices, perform_test): 1017330f729Sjoerg announce_test("sanity check A") 1027330f729Sjoerg all_a = {name: a_b[0] for name, a_b in choices} 1037330f729Sjoerg res_a = perform_test(all_a) 1047330f729Sjoerg if res_a is not True: 1057330f729Sjoerg error("Picking all choices from A failed to pass the test") 1067330f729Sjoerg sys.exit(1) 1077330f729Sjoerg 1087330f729Sjoerg announce_test("sanity check B (expecting failure)") 1097330f729Sjoerg all_b = {name: a_b[1] for name, a_b in choices} 1107330f729Sjoerg res_b = perform_test(all_b) 1117330f729Sjoerg if res_b is not False: 1127330f729Sjoerg error("Picking all choices from B did unexpectedly pass the test") 1137330f729Sjoerg sys.exit(1) 1147330f729Sjoerg 1157330f729Sjoerg 1167330f729Sjoergdef check_sequentially(choices, perform_test): 1177330f729Sjoerg known_good = set() 1187330f729Sjoerg all_a = {name: a_b[0] for name, a_b in choices} 1197330f729Sjoerg n = 1 1207330f729Sjoerg for name, a_b in sorted(choices): 1217330f729Sjoerg picks = dict(all_a) 1227330f729Sjoerg picks[name] = a_b[1] 1237330f729Sjoerg announce_test("checking %s [%d/%d]" % (name, n, len(choices))) 1247330f729Sjoerg n += 1 1257330f729Sjoerg res = perform_test(picks) 1267330f729Sjoerg if res is True: 1277330f729Sjoerg known_good.add(name) 1287330f729Sjoerg return known_good 1297330f729Sjoerg 1307330f729Sjoerg 1317330f729Sjoergdef check_bisect(choices, perform_test): 1327330f729Sjoerg known_good = set() 1337330f729Sjoerg if len(choices) == 0: 1347330f729Sjoerg return known_good 1357330f729Sjoerg 1367330f729Sjoerg choice_map = dict(choices) 1377330f729Sjoerg all_a = {name: a_b[0] for name, a_b in choices} 1387330f729Sjoerg 1397330f729Sjoerg def test_partition(partition, upcoming_partition): 1407330f729Sjoerg # Compute the maximum number of checks we have to do in the worst case. 1417330f729Sjoerg max_remaining_steps = len(partition) * 2 - 1 1427330f729Sjoerg if upcoming_partition is not None: 1437330f729Sjoerg max_remaining_steps += len(upcoming_partition) * 2 - 1 1447330f729Sjoerg for x in partitions_to_split: 1457330f729Sjoerg max_remaining_steps += (len(x) - 1) * 2 1467330f729Sjoerg 1477330f729Sjoerg picks = dict(all_a) 1487330f729Sjoerg for x in partition: 1497330f729Sjoerg picks[x] = choice_map[x][1] 1507330f729Sjoerg announce_test("checking %s [<=%d remaining]" % 1517330f729Sjoerg (format_namelist(partition), max_remaining_steps)) 1527330f729Sjoerg res = perform_test(picks) 1537330f729Sjoerg if res is True: 1547330f729Sjoerg known_good.update(partition) 1557330f729Sjoerg elif len(partition) > 1: 1567330f729Sjoerg partitions_to_split.insert(0, partition) 1577330f729Sjoerg 1587330f729Sjoerg # TODO: 1597330f729Sjoerg # - We could optimize based on the knowledge that when splitting a failed 1607330f729Sjoerg # partition into two and one side checks out okay then we can deduce that 1617330f729Sjoerg # the other partition must be a failure. 1627330f729Sjoerg all_choice_names = [name for name, _ in choices] 1637330f729Sjoerg partitions_to_split = [all_choice_names] 1647330f729Sjoerg while len(partitions_to_split) > 0: 1657330f729Sjoerg partition = partitions_to_split.pop() 1667330f729Sjoerg 1677330f729Sjoerg middle = len(partition) // 2 1687330f729Sjoerg left = partition[0:middle] 1697330f729Sjoerg right = partition[middle:] 1707330f729Sjoerg 1717330f729Sjoerg if len(left) > 0: 1727330f729Sjoerg test_partition(left, right) 1737330f729Sjoerg assert len(right) > 0 1747330f729Sjoerg test_partition(right, None) 1757330f729Sjoerg 1767330f729Sjoerg return known_good 1777330f729Sjoerg 1787330f729Sjoerg 1797330f729Sjoergdef extract_functions(file): 1807330f729Sjoerg functions = [] 1817330f729Sjoerg in_function = None 1827330f729Sjoerg for line in open(file): 1837330f729Sjoerg marker = line.find(" -- Begin function ") 1847330f729Sjoerg if marker != -1: 1857330f729Sjoerg if in_function is not None: 1867330f729Sjoerg warn("Missing end of function %s" % (in_function,)) 1877330f729Sjoerg funcname = line[marker + 19:-1] 1887330f729Sjoerg in_function = funcname 1897330f729Sjoerg text = line 1907330f729Sjoerg continue 1917330f729Sjoerg 1927330f729Sjoerg marker = line.find(" -- End function") 1937330f729Sjoerg if marker != -1: 1947330f729Sjoerg text += line 1957330f729Sjoerg functions.append((in_function, text)) 1967330f729Sjoerg in_function = None 1977330f729Sjoerg continue 1987330f729Sjoerg 1997330f729Sjoerg if in_function is not None: 2007330f729Sjoerg text += line 2017330f729Sjoerg return functions 2027330f729Sjoerg 2037330f729Sjoerg 2047330f729Sjoergdef replace_functions(source, dest, replacements): 2057330f729Sjoerg out = open(dest, "w") 2067330f729Sjoerg skip = False 2077330f729Sjoerg in_function = None 2087330f729Sjoerg for line in open(source): 2097330f729Sjoerg marker = line.find(" -- Begin function ") 2107330f729Sjoerg if marker != -1: 2117330f729Sjoerg if in_function is not None: 2127330f729Sjoerg warn("Missing end of function %s" % (in_function,)) 2137330f729Sjoerg funcname = line[marker + 19:-1] 2147330f729Sjoerg in_function = funcname 2157330f729Sjoerg replacement = replacements.get(in_function) 2167330f729Sjoerg if replacement is not None: 2177330f729Sjoerg out.write(replacement) 2187330f729Sjoerg skip = True 2197330f729Sjoerg else: 2207330f729Sjoerg marker = line.find(" -- End function") 2217330f729Sjoerg if marker != -1: 2227330f729Sjoerg in_function = None 2237330f729Sjoerg if skip: 2247330f729Sjoerg skip = False 2257330f729Sjoerg continue 2267330f729Sjoerg 2277330f729Sjoerg if not skip: 2287330f729Sjoerg out.write(line) 2297330f729Sjoerg 2307330f729Sjoerg 2317330f729Sjoergdef testrun(files): 2327330f729Sjoerg linkline = "%s %s" % (LINKTEST, " ".join(files),) 2337330f729Sjoerg res = subprocess.call(linkline, shell=True) 2347330f729Sjoerg if res != 0: 2357330f729Sjoerg announce_result(FAILED + ": '%s' exitcode != 0" % LINKTEST) 2367330f729Sjoerg return False 2377330f729Sjoerg else: 2387330f729Sjoerg announce_result("ok") 2397330f729Sjoerg return True 2407330f729Sjoerg 2417330f729Sjoerg 242*82d56013Sjoergdef prepare_files(gooddir, baddir, rspfile): 243*82d56013Sjoerg files_a = [] 244*82d56013Sjoerg files_b = [] 245*82d56013Sjoerg 246*82d56013Sjoerg if rspfile is not None: 247*82d56013Sjoerg def get_basename(name): 248*82d56013Sjoerg # remove prefix 249*82d56013Sjoerg if name.startswith(gooddir): 250*82d56013Sjoerg return name[len(gooddir):] 251*82d56013Sjoerg if name.startswith(baddir): 252*82d56013Sjoerg return name[len(baddir):] 253*82d56013Sjoerg assert False, "" 254*82d56013Sjoerg 255*82d56013Sjoerg with open(rspfile, "r") as rf: 256*82d56013Sjoerg for line in rf.read().splitlines(): 257*82d56013Sjoerg for obj in line.split(): 258*82d56013Sjoerg assert not os.path.isabs(obj), "TODO: support abs path" 259*82d56013Sjoerg files_a.append(gooddir + "/" + obj) 260*82d56013Sjoerg files_b.append(baddir + "/" + obj) 261*82d56013Sjoerg else: 262*82d56013Sjoerg get_basename = lambda name: os.path.basename(name) 2637330f729Sjoerg files_a = find(gooddir, "*") 2647330f729Sjoerg files_b = find(baddir, "*") 2657330f729Sjoerg 266*82d56013Sjoerg basenames_a = set(map(get_basename, files_a)) 267*82d56013Sjoerg basenames_b = set(map(get_basename, files_b)) 2687330f729Sjoerg 2697330f729Sjoerg for name in files_b: 270*82d56013Sjoerg basename = get_basename(name) 2717330f729Sjoerg if basename not in basenames_a: 2727330f729Sjoerg warn("There is no corresponding file to '%s' in %s" % 2737330f729Sjoerg (name, gooddir)) 2747330f729Sjoerg choices = [] 2757330f729Sjoerg skipped = [] 2767330f729Sjoerg for name in files_a: 277*82d56013Sjoerg basename = get_basename(name) 2787330f729Sjoerg if basename not in basenames_b: 2797330f729Sjoerg warn("There is no corresponding file to '%s' in %s" % 2807330f729Sjoerg (name, baddir)) 2817330f729Sjoerg 2827330f729Sjoerg file_a = gooddir + "/" + basename 2837330f729Sjoerg file_b = baddir + "/" + basename 2847330f729Sjoerg if filecmp.cmp(file_a, file_b): 2857330f729Sjoerg skipped.append(basename) 2867330f729Sjoerg continue 2877330f729Sjoerg 2887330f729Sjoerg choice = (basename, (file_a, file_b)) 2897330f729Sjoerg choices.append(choice) 2907330f729Sjoerg 2917330f729Sjoerg if len(skipped) > 0: 2927330f729Sjoerg info("Skipped (same content): %s" % format_namelist(skipped)) 2937330f729Sjoerg 2947330f729Sjoerg def perform_test(picks): 2957330f729Sjoerg files = [] 2967330f729Sjoerg # Note that we iterate over files_a so we don't change the order 2977330f729Sjoerg # (cannot use `picks` as it is a dictionary without order) 2987330f729Sjoerg for x in files_a: 299*82d56013Sjoerg basename = get_basename(x) 3007330f729Sjoerg picked = picks.get(basename) 3017330f729Sjoerg if picked is None: 3027330f729Sjoerg assert basename in skipped 3037330f729Sjoerg files.append(x) 3047330f729Sjoerg else: 3057330f729Sjoerg files.append(picked) 306*82d56013Sjoerg 307*82d56013Sjoerg # If response file is used, create a temporary response file for the 308*82d56013Sjoerg # picked files. 309*82d56013Sjoerg if rspfile is not None: 310*82d56013Sjoerg with tempfile.NamedTemporaryFile('w', suffix='.rsp', 311*82d56013Sjoerg delete=False) as tf: 312*82d56013Sjoerg tf.write(" ".join(files)) 313*82d56013Sjoerg tf.flush() 314*82d56013Sjoerg ret = testrun([tf.name]) 315*82d56013Sjoerg os.remove(tf.name) 316*82d56013Sjoerg return ret 317*82d56013Sjoerg 3187330f729Sjoerg return testrun(files) 3197330f729Sjoerg 3207330f729Sjoerg return perform_test, choices 3217330f729Sjoerg 3227330f729Sjoerg 3237330f729Sjoergdef prepare_functions(to_check, gooddir, goodfile, badfile): 3247330f729Sjoerg files_good = find(gooddir, "*") 3257330f729Sjoerg 3267330f729Sjoerg functions_a = extract_functions(goodfile) 3277330f729Sjoerg functions_a_map = dict(functions_a) 3287330f729Sjoerg functions_b_map = dict(extract_functions(badfile)) 3297330f729Sjoerg 3307330f729Sjoerg for name in functions_b_map.keys(): 3317330f729Sjoerg if name not in functions_a_map: 3327330f729Sjoerg warn("Function '%s' missing from good file" % name) 3337330f729Sjoerg choices = [] 3347330f729Sjoerg skipped = [] 3357330f729Sjoerg for name, candidate_a in functions_a: 3367330f729Sjoerg candidate_b = functions_b_map.get(name) 3377330f729Sjoerg if candidate_b is None: 3387330f729Sjoerg warn("Function '%s' missing from bad file" % name) 3397330f729Sjoerg continue 3407330f729Sjoerg if candidate_a == candidate_b: 3417330f729Sjoerg skipped.append(name) 3427330f729Sjoerg continue 3437330f729Sjoerg choice = name, (candidate_a, candidate_b) 3447330f729Sjoerg choices.append(choice) 3457330f729Sjoerg 3467330f729Sjoerg if len(skipped) > 0: 3477330f729Sjoerg info("Skipped (same content): %s" % format_namelist(skipped)) 3487330f729Sjoerg 3497330f729Sjoerg combined_file = '/tmp/combined2.s' 3507330f729Sjoerg files = [] 3517330f729Sjoerg found_good_file = False 3527330f729Sjoerg for c in files_good: 3537330f729Sjoerg if os.path.basename(c) == to_check: 3547330f729Sjoerg found_good_file = True 3557330f729Sjoerg files.append(combined_file) 3567330f729Sjoerg continue 3577330f729Sjoerg files.append(c) 3587330f729Sjoerg assert found_good_file 3597330f729Sjoerg 3607330f729Sjoerg def perform_test(picks): 3617330f729Sjoerg for name, x in picks.items(): 3627330f729Sjoerg assert x == functions_a_map[name] or x == functions_b_map[name] 3637330f729Sjoerg replace_functions(goodfile, combined_file, picks) 3647330f729Sjoerg return testrun(files) 3657330f729Sjoerg return perform_test, choices 3667330f729Sjoerg 3677330f729Sjoerg 3687330f729Sjoergdef main(): 3697330f729Sjoerg parser = argparse.ArgumentParser() 3707330f729Sjoerg parser.add_argument('--a', dest='dir_a', default='before') 3717330f729Sjoerg parser.add_argument('--b', dest='dir_b', default='after') 372*82d56013Sjoerg parser.add_argument('--rsp', default=None) 373*82d56013Sjoerg parser.add_argument('--test', default='./link_test') 3747330f729Sjoerg parser.add_argument('--insane', help='Skip sanity check', 3757330f729Sjoerg action='store_true') 3767330f729Sjoerg parser.add_argument('--seq', 3777330f729Sjoerg help='Check sequentially instead of bisection', 3787330f729Sjoerg action='store_true') 3797330f729Sjoerg parser.add_argument('file', metavar='file', nargs='?') 3807330f729Sjoerg config = parser.parse_args() 3817330f729Sjoerg 3827330f729Sjoerg gooddir = config.dir_a 3837330f729Sjoerg baddir = config.dir_b 384*82d56013Sjoerg rspfile = config.rsp 385*82d56013Sjoerg global LINKTEST 386*82d56013Sjoerg LINKTEST = config.test 3877330f729Sjoerg 3887330f729Sjoerg # Preparation phase: Creates a dictionary mapping names to a list of two 3897330f729Sjoerg # choices each. The bisection algorithm will pick one choice for each name 3907330f729Sjoerg # and then run the perform_test function on it. 3917330f729Sjoerg if config.file is not None: 3927330f729Sjoerg goodfile = gooddir + "/" + config.file 3937330f729Sjoerg badfile = baddir + "/" + config.file 3947330f729Sjoerg perform_test, choices = prepare_functions(config.file, gooddir, 3957330f729Sjoerg goodfile, badfile) 3967330f729Sjoerg else: 397*82d56013Sjoerg perform_test, choices = prepare_files(gooddir, baddir, rspfile) 3987330f729Sjoerg 3997330f729Sjoerg info("%d bisection choices" % len(choices)) 4007330f729Sjoerg 4017330f729Sjoerg # "Checking whether build environment is sane ..." 4027330f729Sjoerg if not config.insane: 4037330f729Sjoerg if not os.access(LINKTEST, os.X_OK): 4047330f729Sjoerg error("Expect '%s' to be present and executable" % (LINKTEST,)) 4057330f729Sjoerg exit(1) 4067330f729Sjoerg 4077330f729Sjoerg check_sanity(choices, perform_test) 4087330f729Sjoerg 4097330f729Sjoerg if config.seq: 4107330f729Sjoerg known_good = check_sequentially(choices, perform_test) 4117330f729Sjoerg else: 4127330f729Sjoerg known_good = check_bisect(choices, perform_test) 4137330f729Sjoerg 4147330f729Sjoerg stderr.write("") 4157330f729Sjoerg if len(known_good) != len(choices): 4167330f729Sjoerg stderr.write("== Failing ==\n") 4177330f729Sjoerg for name, _ in choices: 4187330f729Sjoerg if name not in known_good: 4197330f729Sjoerg stderr.write("%s\n" % name) 4207330f729Sjoerg else: 4217330f729Sjoerg # This shouldn't happen when the sanity check works... 4227330f729Sjoerg # Maybe link_test isn't deterministic? 4237330f729Sjoerg stderr.write("Could not identify failing parts?!?") 4247330f729Sjoerg 4257330f729Sjoerg 4267330f729Sjoergif __name__ == '__main__': 4277330f729Sjoerg main() 428