1#!/usr/bin/env python 2# Merge or print the coverage data collected by asan's coverage. 3# Input files are sequences of 4-byte integers. 4# We need to merge these integers into a set and then 5# either print them (as hex) or dump them into another file. 6import array 7import bisect 8import glob 9import os.path 10import struct 11import subprocess 12import sys 13 14prog_name = "" 15 16 17def Usage(): 18 sys.stderr.write( 19 "Usage: \n" + " " + prog_name + " merge FILE [FILE...] > OUTPUT\n" 20 " " + prog_name + " print FILE [FILE...]\n" 21 " " + prog_name + " unpack FILE [FILE...]\n" 22 " " + prog_name + " rawunpack FILE [FILE ...]\n" 23 " " + prog_name + " missing BINARY < LIST_OF_PCS\n" 24 "\n" 25 ) 26 exit(1) 27 28 29def CheckBits(bits): 30 if bits != 32 and bits != 64: 31 raise Exception("Wrong bitness: %d" % bits) 32 33 34def TypeCodeForBits(bits): 35 CheckBits(bits) 36 return "L" if bits == 64 else "I" 37 38 39def TypeCodeForStruct(bits): 40 CheckBits(bits) 41 return "Q" if bits == 64 else "I" 42 43 44kMagic32SecondHalf = 0xFFFFFF32 45kMagic64SecondHalf = 0xFFFFFF64 46kMagicFirstHalf = 0xC0BFFFFF 47 48 49def MagicForBits(bits): 50 CheckBits(bits) 51 if sys.byteorder == "little": 52 return [ 53 kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf, 54 kMagicFirstHalf, 55 ] 56 else: 57 return [ 58 kMagicFirstHalf, 59 kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf, 60 ] 61 62 63def ReadMagicAndReturnBitness(f, path): 64 magic_bytes = f.read(8) 65 magic_words = struct.unpack("II", magic_bytes) 66 bits = 0 67 idx = 1 if sys.byteorder == "little" else 0 68 if magic_words[idx] == kMagicFirstHalf: 69 if magic_words[1 - idx] == kMagic64SecondHalf: 70 bits = 64 71 elif magic_words[1 - idx] == kMagic32SecondHalf: 72 bits = 32 73 if bits == 0: 74 raise Exception("Bad magic word in %s" % path) 75 return bits 76 77 78def ReadOneFile(path): 79 with open(path, mode="rb") as f: 80 f.seek(0, 2) 81 size = f.tell() 82 f.seek(0, 0) 83 if size < 8: 84 raise Exception("File %s is short (< 8 bytes)" % path) 85 bits = ReadMagicAndReturnBitness(f, path) 86 size -= 8 87 w = size * 8 // bits 88 s = struct.unpack_from(TypeCodeForStruct(bits) * (w), f.read(size)) 89 sys.stderr.write("%s: read %d %d-bit PCs from %s\n" % (prog_name, w, bits, path)) 90 return s 91 92 93def Merge(files): 94 s = set() 95 for f in files: 96 s = s.union(set(ReadOneFile(f))) 97 sys.stderr.write( 98 "%s: %d files merged; %d PCs total\n" % (prog_name, len(files), len(s)) 99 ) 100 return sorted(s) 101 102 103def PrintFiles(files): 104 if len(files) > 1: 105 s = Merge(files) 106 else: # If there is just on file, print the PCs in order. 107 s = ReadOneFile(files[0]) 108 sys.stderr.write("%s: 1 file merged; %d PCs total\n" % (prog_name, len(s))) 109 for i in s: 110 print("0x%x" % i) 111 112 113def MergeAndPrint(files): 114 if sys.stdout.isatty(): 115 Usage() 116 s = Merge(files) 117 bits = 32 118 if max(s) > 0xFFFFFFFF: 119 bits = 64 120 stdout_buf = getattr(sys.stdout, "buffer", sys.stdout) 121 array.array("I", MagicForBits(bits)).tofile(stdout_buf) 122 a = struct.pack(TypeCodeForStruct(bits) * len(s), *s) 123 stdout_buf.write(a) 124 125 126def UnpackOneFile(path): 127 with open(path, mode="rb") as f: 128 sys.stderr.write("%s: unpacking %s\n" % (prog_name, path)) 129 while True: 130 header = f.read(12) 131 if not header: 132 return 133 if len(header) < 12: 134 break 135 pid, module_length, blob_size = struct.unpack("iII", header) 136 module = f.read(module_length).decode("utf-8") 137 blob = f.read(blob_size) 138 assert len(module) == module_length 139 assert len(blob) == blob_size 140 extracted_file = "%s.%d.sancov" % (module, pid) 141 sys.stderr.write("%s: extracting %s\n" % (prog_name, extracted_file)) 142 # The packed file may contain multiple blobs for the same pid/module 143 # pair. Append to the end of the file instead of overwriting. 144 with open(extracted_file, "ab") as f2: 145 f2.write(blob) 146 # fail 147 raise Exception("Error reading file %s" % path) 148 149 150def Unpack(files): 151 for f in files: 152 UnpackOneFile(f) 153 154 155def UnpackOneRawFile(path, map_path): 156 mem_map = [] 157 with open(map_path, mode="rt") as f_map: 158 sys.stderr.write("%s: reading map %s\n" % (prog_name, map_path)) 159 bits = int(f_map.readline()) 160 if bits != 32 and bits != 64: 161 raise Exception("Wrong bits size in the map") 162 for line in f_map: 163 parts = line.rstrip().split() 164 mem_map.append( 165 ( 166 int(parts[0], 16), 167 int(parts[1], 16), 168 int(parts[2], 16), 169 " ".join(parts[3:]), 170 ) 171 ) 172 mem_map.sort(key=lambda m: m[0]) 173 mem_map_keys = [m[0] for m in mem_map] 174 175 with open(path, mode="rb") as f: 176 sys.stderr.write("%s: unpacking %s\n" % (prog_name, path)) 177 178 f.seek(0, 2) 179 size = f.tell() 180 f.seek(0, 0) 181 pcs = struct.unpack_from( 182 TypeCodeForStruct(bits) * (size * 8 // bits), f.read(size) 183 ) 184 mem_map_pcs = [[] for i in range(0, len(mem_map))] 185 186 for pc in pcs: 187 if pc == 0: 188 continue 189 map_idx = bisect.bisect(mem_map_keys, pc) - 1 190 (start, end, base, module_path) = mem_map[map_idx] 191 assert pc >= start 192 if pc >= end: 193 sys.stderr.write( 194 "warning: %s: pc %x outside of any known mapping\n" 195 % (prog_name, pc) 196 ) 197 continue 198 mem_map_pcs[map_idx].append(pc - base) 199 200 for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs): 201 if len(pc_list) == 0: 202 continue 203 assert path.endswith(".sancov.raw") 204 dst_path = module_path + "." + os.path.basename(path)[:-4] 205 sys.stderr.write( 206 "%s: writing %d PCs to %s\n" % (prog_name, len(pc_list), dst_path) 207 ) 208 sorted_pc_list = sorted(pc_list) 209 pc_buffer = struct.pack( 210 TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list 211 ) 212 with open(dst_path, "ab+") as f2: 213 array.array("I", MagicForBits(bits)).tofile(f2) 214 f2.seek(0, 2) 215 f2.write(pc_buffer) 216 217 218def RawUnpack(files): 219 for f in files: 220 if not f.endswith(".sancov.raw"): 221 raise Exception("Unexpected raw file name %s" % f) 222 f_map = f[:-3] + "map" 223 UnpackOneRawFile(f, f_map) 224 225 226def GetInstrumentedPCs(binary): 227 # This looks scary, but all it does is extract all offsets where we call: 228 # - __sanitizer_cov() or __sanitizer_cov_with_check(), 229 # - with call or callq, 230 # - directly or via PLT. 231 cmd = ( 232 r"objdump --no-show-raw-insn -d %s | " 233 r"grep '^\s\+[0-9a-f]\+:\s\+call\(q\|\)\s\+\(0x\|\)[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\|_trace_pc_guard\)\(@plt\|\)>' | " 234 r"grep -o '^\s\+[0-9a-f]\+'" % binary 235 ) 236 lines = subprocess.check_output(cmd, stdin=subprocess.PIPE, shell=True).splitlines() 237 # The PCs we get from objdump are off by 4 bytes, as they point to the 238 # beginning of the callq instruction. Empirically this is true on x86 and 239 # x86_64. 240 return set(int(line.strip(), 16) + 4 for line in lines) 241 242 243def PrintMissing(binary): 244 if not os.path.isfile(binary): 245 raise Exception("File not found: %s" % binary) 246 instrumented = GetInstrumentedPCs(binary) 247 sys.stderr.write( 248 "%s: found %d instrumented PCs in %s\n" % (prog_name, len(instrumented), binary) 249 ) 250 covered = set(int(line, 16) for line in sys.stdin) 251 sys.stderr.write("%s: read %d PCs from stdin\n" % (prog_name, len(covered))) 252 missing = instrumented - covered 253 sys.stderr.write("%s: %d PCs missing from coverage\n" % (prog_name, len(missing))) 254 if len(missing) > len(instrumented) - len(covered): 255 sys.stderr.write( 256 "%s: WARNING: stdin contains PCs not found in binary\n" % prog_name 257 ) 258 for pc in sorted(missing): 259 print("0x%x" % pc) 260 261 262if __name__ == "__main__": 263 prog_name = sys.argv[0] 264 if len(sys.argv) <= 2: 265 Usage() 266 267 if sys.argv[1] == "missing": 268 if len(sys.argv) != 3: 269 Usage() 270 PrintMissing(sys.argv[2]) 271 exit(0) 272 273 file_list = [] 274 for f in sys.argv[2:]: 275 file_list += glob.glob(f) 276 if not file_list: 277 Usage() 278 279 if sys.argv[1] == "print": 280 PrintFiles(file_list) 281 elif sys.argv[1] == "merge": 282 MergeAndPrint(file_list) 283 elif sys.argv[1] == "unpack": 284 Unpack(file_list) 285 elif sys.argv[1] == "rawunpack": 286 RawUnpack(file_list) 287 else: 288 Usage() 289