1#!/usr/bin/env python 2# 3# Given a previous good compile narrow down miscompiles. 4# Expects two directories named "before" and "after" each containing a set of 5# assembly or object files where the "after" version is assumed to be broken. 6# You also have to provide a script called "link_test". It is called with a 7# list of files which should be linked together and result tested. "link_test" 8# should returns with exitcode 0 if the linking and testing succeeded. 9# 10# If a response file is provided, only the object files that are listed in the 11# file are inspected. In addition, the "link_test" is called with a temporary 12# response file representing one iteration of bisection. 13# 14# abtest.py operates by taking all files from the "before" directory and 15# in each step replacing one of them with a file from the "bad" directory. 16# 17# Additionally you can perform the same steps with a single .s file. In this 18# mode functions are identified by " -- Begin function FunctionName" and 19# " -- End function" markers. The abtest.py then takes all 20# function from the file in the "before" directory and replaces one function 21# with the corresponding function from the "bad" file in each step. 22# 23# Example usage to identify miscompiled files: 24# 1. Create a link_test script, make it executable. Simple Example: 25# clang "$@" -o /tmp/test && /tmp/test || echo "PROBLEM" 26# 2. Run the script to figure out which files are miscompiled: 27# > ./abtest.py 28# somefile.s: ok 29# someotherfile.s: skipped: same content 30# anotherfile.s: failed: './link_test' exitcode != 0 31# ... 32# Example usage to identify miscompiled functions inside a file: 33# 3. Run the tests on a single file (assuming before/file.s and 34# after/file.s exist) 35# > ./abtest.py file.s 36# funcname1 [0/XX]: ok 37# funcname2 [1/XX]: ok 38# funcname3 [2/XX]: skipped: same content 39# funcname4 [3/XX]: failed: './link_test' exitcode != 0 40# ... 41from fnmatch import filter 42from sys import stderr 43import argparse 44import filecmp 45import os 46import subprocess 47import sys 48import tempfile 49 50# Specify LINKTEST via `--test`. Default value is './link_test'. 51LINKTEST = "" 52ESCAPE = "\033[%sm" 53BOLD = ESCAPE % "1" 54RED = ESCAPE % "31" 55NORMAL = ESCAPE % "0" 56FAILED = RED + "failed" + NORMAL 57 58 59def find(dir, file_filter=None): 60 files = [walkdir[0] + "/" + file for walkdir in os.walk(dir) for file in walkdir[2]] 61 if file_filter is not None: 62 files = filter(files, file_filter) 63 return sorted(files) 64 65 66def error(message): 67 stderr.write("Error: %s\n" % (message,)) 68 69 70def warn(message): 71 stderr.write("Warning: %s\n" % (message,)) 72 73 74def info(message): 75 stderr.write("Info: %s\n" % (message,)) 76 77 78def announce_test(name): 79 stderr.write("%s%s%s: " % (BOLD, name, NORMAL)) 80 stderr.flush() 81 82 83def announce_result(result): 84 stderr.write(result) 85 stderr.write("\n") 86 stderr.flush() 87 88 89def format_namelist(l): 90 result = ", ".join(l[0:3]) 91 if len(l) > 3: 92 result += "... (%d total)" % len(l) 93 return result 94 95 96def check_sanity(choices, perform_test): 97 announce_test("sanity check A") 98 all_a = {name: a_b[0] for name, a_b in choices} 99 res_a = perform_test(all_a) 100 if res_a is not True: 101 error("Picking all choices from A failed to pass the test") 102 sys.exit(1) 103 104 announce_test("sanity check B (expecting failure)") 105 all_b = {name: a_b[1] for name, a_b in choices} 106 res_b = perform_test(all_b) 107 if res_b is not False: 108 error("Picking all choices from B did unexpectedly pass the test") 109 sys.exit(1) 110 111 112def check_sequentially(choices, perform_test): 113 known_good = set() 114 all_a = {name: a_b[0] for name, a_b in choices} 115 n = 1 116 for name, a_b in sorted(choices): 117 picks = dict(all_a) 118 picks[name] = a_b[1] 119 announce_test("checking %s [%d/%d]" % (name, n, len(choices))) 120 n += 1 121 res = perform_test(picks) 122 if res is True: 123 known_good.add(name) 124 return known_good 125 126 127def check_bisect(choices, perform_test): 128 known_good = set() 129 if len(choices) == 0: 130 return known_good 131 132 choice_map = dict(choices) 133 all_a = {name: a_b[0] for name, a_b in choices} 134 135 def test_partition(partition, upcoming_partition): 136 # Compute the maximum number of checks we have to do in the worst case. 137 max_remaining_steps = len(partition) * 2 - 1 138 if upcoming_partition is not None: 139 max_remaining_steps += len(upcoming_partition) * 2 - 1 140 for x in partitions_to_split: 141 max_remaining_steps += (len(x) - 1) * 2 142 143 picks = dict(all_a) 144 for x in partition: 145 picks[x] = choice_map[x][1] 146 announce_test( 147 "checking %s [<=%d remaining]" 148 % (format_namelist(partition), max_remaining_steps) 149 ) 150 res = perform_test(picks) 151 if res is True: 152 known_good.update(partition) 153 elif len(partition) > 1: 154 partitions_to_split.insert(0, partition) 155 156 # TODO: 157 # - We could optimize based on the knowledge that when splitting a failed 158 # partition into two and one side checks out okay then we can deduce that 159 # the other partition must be a failure. 160 all_choice_names = [name for name, _ in choices] 161 partitions_to_split = [all_choice_names] 162 while len(partitions_to_split) > 0: 163 partition = partitions_to_split.pop() 164 165 middle = len(partition) // 2 166 left = partition[0:middle] 167 right = partition[middle:] 168 169 if len(left) > 0: 170 test_partition(left, right) 171 assert len(right) > 0 172 test_partition(right, None) 173 174 return known_good 175 176 177def extract_functions(file): 178 functions = [] 179 in_function = None 180 for line in open(file): 181 marker = line.find(" -- Begin function ") 182 if marker != -1: 183 if in_function is not None: 184 warn("Missing end of function %s" % (in_function,)) 185 funcname = line[marker + 19 : -1] 186 in_function = funcname 187 text = line 188 continue 189 190 marker = line.find(" -- End function") 191 if marker != -1: 192 text += line 193 functions.append((in_function, text)) 194 in_function = None 195 continue 196 197 if in_function is not None: 198 text += line 199 return functions 200 201 202def replace_functions(source, dest, replacements): 203 out = open(dest, "w") 204 skip = False 205 in_function = None 206 for line in open(source): 207 marker = line.find(" -- Begin function ") 208 if marker != -1: 209 if in_function is not None: 210 warn("Missing end of function %s" % (in_function,)) 211 funcname = line[marker + 19 : -1] 212 in_function = funcname 213 replacement = replacements.get(in_function) 214 if replacement is not None: 215 out.write(replacement) 216 skip = True 217 else: 218 marker = line.find(" -- End function") 219 if marker != -1: 220 in_function = None 221 if skip: 222 skip = False 223 continue 224 225 if not skip: 226 out.write(line) 227 228 229def testrun(files): 230 linkline = "%s %s" % ( 231 LINKTEST, 232 " ".join(files), 233 ) 234 res = subprocess.call(linkline, shell=True) 235 if res != 0: 236 announce_result(FAILED + ": '%s' exitcode != 0" % LINKTEST) 237 return False 238 else: 239 announce_result("ok") 240 return True 241 242 243def prepare_files(gooddir, baddir, rspfile): 244 files_a = [] 245 files_b = [] 246 247 if rspfile is not None: 248 249 def get_basename(name): 250 # remove prefix 251 if name.startswith(gooddir): 252 return name[len(gooddir) :] 253 if name.startswith(baddir): 254 return name[len(baddir) :] 255 assert False, "" 256 257 with open(rspfile, "r") as rf: 258 for line in rf.read().splitlines(): 259 for obj in line.split(): 260 assert not os.path.isabs(obj), "TODO: support abs path" 261 files_a.append(gooddir + "/" + obj) 262 files_b.append(baddir + "/" + obj) 263 else: 264 get_basename = lambda name: os.path.basename(name) 265 files_a = find(gooddir, "*") 266 files_b = find(baddir, "*") 267 268 basenames_a = set(map(get_basename, files_a)) 269 basenames_b = set(map(get_basename, files_b)) 270 271 for name in files_b: 272 basename = get_basename(name) 273 if basename not in basenames_a: 274 warn("There is no corresponding file to '%s' in %s" % (name, gooddir)) 275 choices = [] 276 skipped = [] 277 for name in files_a: 278 basename = get_basename(name) 279 if basename not in basenames_b: 280 warn("There is no corresponding file to '%s' in %s" % (name, baddir)) 281 282 file_a = gooddir + "/" + basename 283 file_b = baddir + "/" + basename 284 if filecmp.cmp(file_a, file_b): 285 skipped.append(basename) 286 continue 287 288 choice = (basename, (file_a, file_b)) 289 choices.append(choice) 290 291 if len(skipped) > 0: 292 info("Skipped (same content): %s" % format_namelist(skipped)) 293 294 def perform_test(picks): 295 files = [] 296 # Note that we iterate over files_a so we don't change the order 297 # (cannot use `picks` as it is a dictionary without order) 298 for x in files_a: 299 basename = get_basename(x) 300 picked = picks.get(basename) 301 if picked is None: 302 assert basename in skipped 303 files.append(x) 304 else: 305 files.append(picked) 306 307 # If response file is used, create a temporary response file for the 308 # picked files. 309 if rspfile is not None: 310 with tempfile.NamedTemporaryFile("w", suffix=".rsp", delete=False) as tf: 311 tf.write(" ".join(files)) 312 tf.flush() 313 ret = testrun([tf.name]) 314 os.remove(tf.name) 315 return ret 316 317 return testrun(files) 318 319 return perform_test, choices 320 321 322def prepare_functions(to_check, gooddir, goodfile, badfile): 323 files_good = find(gooddir, "*") 324 325 functions_a = extract_functions(goodfile) 326 functions_a_map = dict(functions_a) 327 functions_b_map = dict(extract_functions(badfile)) 328 329 for name in functions_b_map.keys(): 330 if name not in functions_a_map: 331 warn("Function '%s' missing from good file" % name) 332 choices = [] 333 skipped = [] 334 for name, candidate_a in functions_a: 335 candidate_b = functions_b_map.get(name) 336 if candidate_b is None: 337 warn("Function '%s' missing from bad file" % name) 338 continue 339 if candidate_a == candidate_b: 340 skipped.append(name) 341 continue 342 choice = name, (candidate_a, candidate_b) 343 choices.append(choice) 344 345 if len(skipped) > 0: 346 info("Skipped (same content): %s" % format_namelist(skipped)) 347 348 combined_file = "/tmp/combined2.s" 349 files = [] 350 found_good_file = False 351 for c in files_good: 352 if os.path.basename(c) == to_check: 353 found_good_file = True 354 files.append(combined_file) 355 continue 356 files.append(c) 357 assert found_good_file 358 359 def perform_test(picks): 360 for name, x in picks.items(): 361 assert x == functions_a_map[name] or x == functions_b_map[name] 362 replace_functions(goodfile, combined_file, picks) 363 return testrun(files) 364 365 return perform_test, choices 366 367 368def main(): 369 parser = argparse.ArgumentParser() 370 parser.add_argument("--a", dest="dir_a", default="before") 371 parser.add_argument("--b", dest="dir_b", default="after") 372 parser.add_argument("--rsp", default=None) 373 parser.add_argument("--test", default="./link_test") 374 parser.add_argument("--insane", help="Skip sanity check", action="store_true") 375 parser.add_argument( 376 "--seq", help="Check sequentially instead of bisection", action="store_true" 377 ) 378 parser.add_argument("file", metavar="file", nargs="?") 379 config = parser.parse_args() 380 381 gooddir = config.dir_a 382 baddir = config.dir_b 383 rspfile = config.rsp 384 global LINKTEST 385 LINKTEST = config.test 386 387 # Preparation phase: Creates a dictionary mapping names to a list of two 388 # choices each. The bisection algorithm will pick one choice for each name 389 # and then run the perform_test function on it. 390 if config.file is not None: 391 goodfile = gooddir + "/" + config.file 392 badfile = baddir + "/" + config.file 393 perform_test, choices = prepare_functions( 394 config.file, gooddir, goodfile, badfile 395 ) 396 else: 397 perform_test, choices = prepare_files(gooddir, baddir, rspfile) 398 399 info("%d bisection choices" % len(choices)) 400 401 # "Checking whether build environment is sane ..." 402 if not config.insane: 403 if not os.access(LINKTEST, os.X_OK): 404 error("Expect '%s' to be present and executable" % (LINKTEST,)) 405 exit(1) 406 407 check_sanity(choices, perform_test) 408 409 if config.seq: 410 known_good = check_sequentially(choices, perform_test) 411 else: 412 known_good = check_bisect(choices, perform_test) 413 414 stderr.write("") 415 if len(known_good) != len(choices): 416 stderr.write("== Failing ==\n") 417 for name, _ in choices: 418 if name not in known_good: 419 stderr.write("%s\n" % name) 420 else: 421 # This shouldn't happen when the sanity check works... 422 # Maybe link_test isn't deterministic? 423 stderr.write("Could not identify failing parts?!?") 424 425 426if __name__ == "__main__": 427 main() 428