13cab2bb3Spatrick#!/usr/bin/env python 23cab2bb3Spatrick# Merge or print the coverage data collected by asan's coverage. 33cab2bb3Spatrick# Input files are sequences of 4-byte integers. 43cab2bb3Spatrick# We need to merge these integers into a set and then 53cab2bb3Spatrick# either print them (as hex) or dump them into another file. 63cab2bb3Spatrickimport array 73cab2bb3Spatrickimport bisect 83cab2bb3Spatrickimport glob 93cab2bb3Spatrickimport os.path 103cab2bb3Spatrickimport struct 113cab2bb3Spatrickimport subprocess 123cab2bb3Spatrickimport sys 133cab2bb3Spatrick 143cab2bb3Spatrickprog_name = "" 153cab2bb3Spatrick 163cab2bb3Spatrickdef Usage(): 173cab2bb3Spatrick sys.stderr.write( 183cab2bb3Spatrick "Usage: \n" + \ 193cab2bb3Spatrick " " + prog_name + " merge FILE [FILE...] > OUTPUT\n" \ 203cab2bb3Spatrick " " + prog_name + " print FILE [FILE...]\n" \ 213cab2bb3Spatrick " " + prog_name + " unpack FILE [FILE...]\n" \ 223cab2bb3Spatrick " " + prog_name + " rawunpack FILE [FILE ...]\n" \ 233cab2bb3Spatrick " " + prog_name + " missing BINARY < LIST_OF_PCS\n" \ 243cab2bb3Spatrick "\n") 253cab2bb3Spatrick exit(1) 263cab2bb3Spatrick 273cab2bb3Spatrickdef CheckBits(bits): 283cab2bb3Spatrick if bits != 32 and bits != 64: 293cab2bb3Spatrick raise Exception("Wrong bitness: %d" % bits) 303cab2bb3Spatrick 313cab2bb3Spatrickdef TypeCodeForBits(bits): 323cab2bb3Spatrick CheckBits(bits) 333cab2bb3Spatrick return 'L' if bits == 64 else 'I' 343cab2bb3Spatrick 353cab2bb3Spatrickdef TypeCodeForStruct(bits): 363cab2bb3Spatrick CheckBits(bits) 373cab2bb3Spatrick return 'Q' if bits == 64 else 'I' 383cab2bb3Spatrick 393cab2bb3SpatrickkMagic32SecondHalf = 0xFFFFFF32; 403cab2bb3SpatrickkMagic64SecondHalf = 0xFFFFFF64; 413cab2bb3SpatrickkMagicFirstHalf = 0xC0BFFFFF; 423cab2bb3Spatrick 433cab2bb3Spatrickdef MagicForBits(bits): 443cab2bb3Spatrick CheckBits(bits) 453cab2bb3Spatrick if sys.byteorder == 'little': 463cab2bb3Spatrick return [kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf, kMagicFirstHalf] 473cab2bb3Spatrick else: 483cab2bb3Spatrick return [kMagicFirstHalf, kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf] 493cab2bb3Spatrick 503cab2bb3Spatrickdef ReadMagicAndReturnBitness(f, path): 513cab2bb3Spatrick magic_bytes = f.read(8) 523cab2bb3Spatrick magic_words = struct.unpack('II', magic_bytes); 533cab2bb3Spatrick bits = 0 543cab2bb3Spatrick idx = 1 if sys.byteorder == 'little' else 0 553cab2bb3Spatrick if magic_words[idx] == kMagicFirstHalf: 563cab2bb3Spatrick if magic_words[1-idx] == kMagic64SecondHalf: 573cab2bb3Spatrick bits = 64 583cab2bb3Spatrick elif magic_words[1-idx] == kMagic32SecondHalf: 593cab2bb3Spatrick bits = 32 603cab2bb3Spatrick if bits == 0: 613cab2bb3Spatrick raise Exception('Bad magic word in %s' % path) 623cab2bb3Spatrick return bits 633cab2bb3Spatrick 643cab2bb3Spatrickdef ReadOneFile(path): 653cab2bb3Spatrick with open(path, mode="rb") as f: 663cab2bb3Spatrick f.seek(0, 2) 673cab2bb3Spatrick size = f.tell() 683cab2bb3Spatrick f.seek(0, 0) 693cab2bb3Spatrick if size < 8: 703cab2bb3Spatrick raise Exception('File %s is short (< 8 bytes)' % path) 713cab2bb3Spatrick bits = ReadMagicAndReturnBitness(f, path) 723cab2bb3Spatrick size -= 8 733cab2bb3Spatrick w = size * 8 // bits 743cab2bb3Spatrick s = struct.unpack_from(TypeCodeForStruct(bits) * (w), f.read(size)) 753cab2bb3Spatrick sys.stderr.write( 763cab2bb3Spatrick "%s: read %d %d-bit PCs from %s\n" % (prog_name, w, bits, path)) 773cab2bb3Spatrick return s 783cab2bb3Spatrick 793cab2bb3Spatrickdef Merge(files): 803cab2bb3Spatrick s = set() 813cab2bb3Spatrick for f in files: 823cab2bb3Spatrick s = s.union(set(ReadOneFile(f))) 833cab2bb3Spatrick sys.stderr.write( 843cab2bb3Spatrick "%s: %d files merged; %d PCs total\n" % (prog_name, len(files), len(s)) 853cab2bb3Spatrick ) 863cab2bb3Spatrick return sorted(s) 873cab2bb3Spatrick 883cab2bb3Spatrickdef PrintFiles(files): 893cab2bb3Spatrick if len(files) > 1: 903cab2bb3Spatrick s = Merge(files) 913cab2bb3Spatrick else: # If there is just on file, print the PCs in order. 923cab2bb3Spatrick s = ReadOneFile(files[0]) 933cab2bb3Spatrick sys.stderr.write("%s: 1 file merged; %d PCs total\n" % (prog_name, len(s))) 943cab2bb3Spatrick for i in s: 953cab2bb3Spatrick print("0x%x" % i) 963cab2bb3Spatrick 973cab2bb3Spatrickdef MergeAndPrint(files): 983cab2bb3Spatrick if sys.stdout.isatty(): 993cab2bb3Spatrick Usage() 1003cab2bb3Spatrick s = Merge(files) 1013cab2bb3Spatrick bits = 32 1023cab2bb3Spatrick if max(s) > 0xFFFFFFFF: 1033cab2bb3Spatrick bits = 64 1043cab2bb3Spatrick stdout_buf = getattr(sys.stdout, 'buffer', sys.stdout) 1053cab2bb3Spatrick array.array('I', MagicForBits(bits)).tofile(stdout_buf) 1063cab2bb3Spatrick a = struct.pack(TypeCodeForStruct(bits) * len(s), *s) 1073cab2bb3Spatrick stdout_buf.write(a) 1083cab2bb3Spatrick 1093cab2bb3Spatrick 1103cab2bb3Spatrickdef UnpackOneFile(path): 1113cab2bb3Spatrick with open(path, mode="rb") as f: 1123cab2bb3Spatrick sys.stderr.write("%s: unpacking %s\n" % (prog_name, path)) 1133cab2bb3Spatrick while True: 1143cab2bb3Spatrick header = f.read(12) 1153cab2bb3Spatrick if not header: return 1163cab2bb3Spatrick if len(header) < 12: 1173cab2bb3Spatrick break 1183cab2bb3Spatrick pid, module_length, blob_size = struct.unpack('iII', header) 1193cab2bb3Spatrick module = f.read(module_length).decode('utf-8') 1203cab2bb3Spatrick blob = f.read(blob_size) 1213cab2bb3Spatrick assert(len(module) == module_length) 1223cab2bb3Spatrick assert(len(blob) == blob_size) 1233cab2bb3Spatrick extracted_file = "%s.%d.sancov" % (module, pid) 1243cab2bb3Spatrick sys.stderr.write("%s: extracting %s\n" % (prog_name, extracted_file)) 1253cab2bb3Spatrick # The packed file may contain multiple blobs for the same pid/module 1263cab2bb3Spatrick # pair. Append to the end of the file instead of overwriting. 1273cab2bb3Spatrick with open(extracted_file, 'ab') as f2: 1283cab2bb3Spatrick f2.write(blob) 1293cab2bb3Spatrick # fail 1303cab2bb3Spatrick raise Exception('Error reading file %s' % path) 1313cab2bb3Spatrick 1323cab2bb3Spatrick 1333cab2bb3Spatrickdef Unpack(files): 1343cab2bb3Spatrick for f in files: 1353cab2bb3Spatrick UnpackOneFile(f) 1363cab2bb3Spatrick 1373cab2bb3Spatrickdef UnpackOneRawFile(path, map_path): 1383cab2bb3Spatrick mem_map = [] 1393cab2bb3Spatrick with open(map_path, mode="rt") as f_map: 1403cab2bb3Spatrick sys.stderr.write("%s: reading map %s\n" % (prog_name, map_path)) 1413cab2bb3Spatrick bits = int(f_map.readline()) 1423cab2bb3Spatrick if bits != 32 and bits != 64: 1433cab2bb3Spatrick raise Exception('Wrong bits size in the map') 1443cab2bb3Spatrick for line in f_map: 1453cab2bb3Spatrick parts = line.rstrip().split() 1463cab2bb3Spatrick mem_map.append((int(parts[0], 16), 1473cab2bb3Spatrick int(parts[1], 16), 1483cab2bb3Spatrick int(parts[2], 16), 1493cab2bb3Spatrick ' '.join(parts[3:]))) 1503cab2bb3Spatrick mem_map.sort(key=lambda m : m[0]) 1513cab2bb3Spatrick mem_map_keys = [m[0] for m in mem_map] 1523cab2bb3Spatrick 1533cab2bb3Spatrick with open(path, mode="rb") as f: 1543cab2bb3Spatrick sys.stderr.write("%s: unpacking %s\n" % (prog_name, path)) 1553cab2bb3Spatrick 1563cab2bb3Spatrick f.seek(0, 2) 1573cab2bb3Spatrick size = f.tell() 1583cab2bb3Spatrick f.seek(0, 0) 1593cab2bb3Spatrick pcs = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 // bits), f.read(size)) 1603cab2bb3Spatrick mem_map_pcs = [[] for i in range(0, len(mem_map))] 1613cab2bb3Spatrick 1623cab2bb3Spatrick for pc in pcs: 1633cab2bb3Spatrick if pc == 0: continue 1643cab2bb3Spatrick map_idx = bisect.bisect(mem_map_keys, pc) - 1 1653cab2bb3Spatrick (start, end, base, module_path) = mem_map[map_idx] 1663cab2bb3Spatrick assert pc >= start 1673cab2bb3Spatrick if pc >= end: 1683cab2bb3Spatrick sys.stderr.write("warning: %s: pc %x outside of any known mapping\n" % (prog_name, pc)) 1693cab2bb3Spatrick continue 1703cab2bb3Spatrick mem_map_pcs[map_idx].append(pc - base) 1713cab2bb3Spatrick 1723cab2bb3Spatrick for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs): 1733cab2bb3Spatrick if len(pc_list) == 0: continue 1743cab2bb3Spatrick assert path.endswith('.sancov.raw') 1753cab2bb3Spatrick dst_path = module_path + '.' + os.path.basename(path)[:-4] 1763cab2bb3Spatrick sys.stderr.write("%s: writing %d PCs to %s\n" % (prog_name, len(pc_list), dst_path)) 1773cab2bb3Spatrick sorted_pc_list = sorted(pc_list) 1783cab2bb3Spatrick pc_buffer = struct.pack(TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list) 1793cab2bb3Spatrick with open(dst_path, 'ab+') as f2: 1803cab2bb3Spatrick array.array('I', MagicForBits(bits)).tofile(f2) 1813cab2bb3Spatrick f2.seek(0, 2) 1823cab2bb3Spatrick f2.write(pc_buffer) 1833cab2bb3Spatrick 1843cab2bb3Spatrickdef RawUnpack(files): 1853cab2bb3Spatrick for f in files: 1863cab2bb3Spatrick if not f.endswith('.sancov.raw'): 1873cab2bb3Spatrick raise Exception('Unexpected raw file name %s' % f) 1883cab2bb3Spatrick f_map = f[:-3] + 'map' 1893cab2bb3Spatrick UnpackOneRawFile(f, f_map) 1903cab2bb3Spatrick 1913cab2bb3Spatrickdef GetInstrumentedPCs(binary): 1923cab2bb3Spatrick # This looks scary, but all it does is extract all offsets where we call: 1933cab2bb3Spatrick # - __sanitizer_cov() or __sanitizer_cov_with_check(), 1943cab2bb3Spatrick # - with call or callq, 1953cab2bb3Spatrick # - directly or via PLT. 196*d89ec533Spatrick cmd = r"objdump --no-show-raw-insn -d %s | " \ 197*d89ec533Spatrick r"grep '^\s\+[0-9a-f]\+:\s\+call\(q\|\)\s\+\(0x\|\)[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\|_trace_pc_guard\)\(@plt\|\)>' | " \ 198*d89ec533Spatrick r"grep -o '^\s\+[0-9a-f]\+'" % binary 199*d89ec533Spatrick lines = subprocess.check_output(cmd, stdin=subprocess.PIPE, shell=True).splitlines() 2003cab2bb3Spatrick # The PCs we get from objdump are off by 4 bytes, as they point to the 2013cab2bb3Spatrick # beginning of the callq instruction. Empirically this is true on x86 and 2023cab2bb3Spatrick # x86_64. 203*d89ec533Spatrick return set(int(line.strip(), 16) + 4 for line in lines) 2043cab2bb3Spatrick 2053cab2bb3Spatrickdef PrintMissing(binary): 2063cab2bb3Spatrick if not os.path.isfile(binary): 2073cab2bb3Spatrick raise Exception('File not found: %s' % binary) 2083cab2bb3Spatrick instrumented = GetInstrumentedPCs(binary) 2093cab2bb3Spatrick sys.stderr.write("%s: found %d instrumented PCs in %s\n" % (prog_name, 2103cab2bb3Spatrick len(instrumented), 2113cab2bb3Spatrick binary)) 2123cab2bb3Spatrick covered = set(int(line, 16) for line in sys.stdin) 2133cab2bb3Spatrick sys.stderr.write("%s: read %d PCs from stdin\n" % (prog_name, len(covered))) 2143cab2bb3Spatrick missing = instrumented - covered 2153cab2bb3Spatrick sys.stderr.write("%s: %d PCs missing from coverage\n" % (prog_name, len(missing))) 2163cab2bb3Spatrick if (len(missing) > len(instrumented) - len(covered)): 2173cab2bb3Spatrick sys.stderr.write( 2183cab2bb3Spatrick "%s: WARNING: stdin contains PCs not found in binary\n" % prog_name 2193cab2bb3Spatrick ) 2203cab2bb3Spatrick for pc in sorted(missing): 2213cab2bb3Spatrick print("0x%x" % pc) 2223cab2bb3Spatrick 2233cab2bb3Spatrickif __name__ == '__main__': 2243cab2bb3Spatrick prog_name = sys.argv[0] 2253cab2bb3Spatrick if len(sys.argv) <= 2: 2263cab2bb3Spatrick Usage(); 2273cab2bb3Spatrick 2283cab2bb3Spatrick if sys.argv[1] == "missing": 2293cab2bb3Spatrick if len(sys.argv) != 3: 2303cab2bb3Spatrick Usage() 2313cab2bb3Spatrick PrintMissing(sys.argv[2]) 2323cab2bb3Spatrick exit(0) 2333cab2bb3Spatrick 2343cab2bb3Spatrick file_list = [] 2353cab2bb3Spatrick for f in sys.argv[2:]: 2363cab2bb3Spatrick file_list += glob.glob(f) 2373cab2bb3Spatrick if not file_list: 2383cab2bb3Spatrick Usage() 2393cab2bb3Spatrick 2403cab2bb3Spatrick if sys.argv[1] == "print": 2413cab2bb3Spatrick PrintFiles(file_list) 2423cab2bb3Spatrick elif sys.argv[1] == "merge": 2433cab2bb3Spatrick MergeAndPrint(file_list) 2443cab2bb3Spatrick elif sys.argv[1] == "unpack": 2453cab2bb3Spatrick Unpack(file_list) 2463cab2bb3Spatrick elif sys.argv[1] == "rawunpack": 2473cab2bb3Spatrick RawUnpack(file_list) 2483cab2bb3Spatrick else: 2493cab2bb3Spatrick Usage() 250