xref: /openbsd-src/gnu/llvm/compiler-rt/lib/sanitizer_common/scripts/sancov.py (revision d89ec533011f513df1010f142a111086a0785f09)
13cab2bb3Spatrick#!/usr/bin/env python
23cab2bb3Spatrick# Merge or print the coverage data collected by asan's coverage.
33cab2bb3Spatrick# Input files are sequences of 4-byte integers.
43cab2bb3Spatrick# We need to merge these integers into a set and then
53cab2bb3Spatrick# either print them (as hex) or dump them into another file.
63cab2bb3Spatrickimport array
73cab2bb3Spatrickimport bisect
83cab2bb3Spatrickimport glob
93cab2bb3Spatrickimport os.path
103cab2bb3Spatrickimport struct
113cab2bb3Spatrickimport subprocess
123cab2bb3Spatrickimport sys
133cab2bb3Spatrick
143cab2bb3Spatrickprog_name = ""
153cab2bb3Spatrick
163cab2bb3Spatrickdef Usage():
173cab2bb3Spatrick  sys.stderr.write(
183cab2bb3Spatrick    "Usage: \n" + \
193cab2bb3Spatrick    " " + prog_name + " merge FILE [FILE...] > OUTPUT\n" \
203cab2bb3Spatrick    " " + prog_name + " print FILE [FILE...]\n" \
213cab2bb3Spatrick    " " + prog_name + " unpack FILE [FILE...]\n" \
223cab2bb3Spatrick    " " + prog_name + " rawunpack FILE [FILE ...]\n" \
233cab2bb3Spatrick    " " + prog_name + " missing BINARY < LIST_OF_PCS\n" \
243cab2bb3Spatrick    "\n")
253cab2bb3Spatrick  exit(1)
263cab2bb3Spatrick
273cab2bb3Spatrickdef CheckBits(bits):
283cab2bb3Spatrick  if bits != 32 and bits != 64:
293cab2bb3Spatrick    raise Exception("Wrong bitness: %d" % bits)
303cab2bb3Spatrick
313cab2bb3Spatrickdef TypeCodeForBits(bits):
323cab2bb3Spatrick  CheckBits(bits)
333cab2bb3Spatrick  return 'L' if bits == 64 else 'I'
343cab2bb3Spatrick
353cab2bb3Spatrickdef TypeCodeForStruct(bits):
363cab2bb3Spatrick  CheckBits(bits)
373cab2bb3Spatrick  return 'Q' if bits == 64 else 'I'
383cab2bb3Spatrick
393cab2bb3SpatrickkMagic32SecondHalf = 0xFFFFFF32;
403cab2bb3SpatrickkMagic64SecondHalf = 0xFFFFFF64;
413cab2bb3SpatrickkMagicFirstHalf    = 0xC0BFFFFF;
423cab2bb3Spatrick
433cab2bb3Spatrickdef MagicForBits(bits):
443cab2bb3Spatrick  CheckBits(bits)
453cab2bb3Spatrick  if sys.byteorder == 'little':
463cab2bb3Spatrick    return [kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf, kMagicFirstHalf]
473cab2bb3Spatrick  else:
483cab2bb3Spatrick    return [kMagicFirstHalf, kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf]
493cab2bb3Spatrick
503cab2bb3Spatrickdef ReadMagicAndReturnBitness(f, path):
513cab2bb3Spatrick  magic_bytes = f.read(8)
523cab2bb3Spatrick  magic_words = struct.unpack('II', magic_bytes);
533cab2bb3Spatrick  bits = 0
543cab2bb3Spatrick  idx = 1 if sys.byteorder == 'little' else 0
553cab2bb3Spatrick  if magic_words[idx] == kMagicFirstHalf:
563cab2bb3Spatrick    if magic_words[1-idx] == kMagic64SecondHalf:
573cab2bb3Spatrick      bits = 64
583cab2bb3Spatrick    elif magic_words[1-idx] == kMagic32SecondHalf:
593cab2bb3Spatrick      bits = 32
603cab2bb3Spatrick  if bits == 0:
613cab2bb3Spatrick    raise Exception('Bad magic word in %s' % path)
623cab2bb3Spatrick  return bits
633cab2bb3Spatrick
643cab2bb3Spatrickdef ReadOneFile(path):
653cab2bb3Spatrick  with open(path, mode="rb") as f:
663cab2bb3Spatrick    f.seek(0, 2)
673cab2bb3Spatrick    size = f.tell()
683cab2bb3Spatrick    f.seek(0, 0)
693cab2bb3Spatrick    if size < 8:
703cab2bb3Spatrick      raise Exception('File %s is short (< 8 bytes)' % path)
713cab2bb3Spatrick    bits = ReadMagicAndReturnBitness(f, path)
723cab2bb3Spatrick    size -= 8
733cab2bb3Spatrick    w = size * 8 // bits
743cab2bb3Spatrick    s = struct.unpack_from(TypeCodeForStruct(bits) * (w), f.read(size))
753cab2bb3Spatrick  sys.stderr.write(
763cab2bb3Spatrick    "%s: read %d %d-bit PCs from %s\n" % (prog_name, w, bits, path))
773cab2bb3Spatrick  return s
783cab2bb3Spatrick
793cab2bb3Spatrickdef Merge(files):
803cab2bb3Spatrick  s = set()
813cab2bb3Spatrick  for f in files:
823cab2bb3Spatrick    s = s.union(set(ReadOneFile(f)))
833cab2bb3Spatrick  sys.stderr.write(
843cab2bb3Spatrick    "%s: %d files merged; %d PCs total\n" % (prog_name, len(files), len(s))
853cab2bb3Spatrick  )
863cab2bb3Spatrick  return sorted(s)
873cab2bb3Spatrick
883cab2bb3Spatrickdef PrintFiles(files):
893cab2bb3Spatrick  if len(files) > 1:
903cab2bb3Spatrick    s = Merge(files)
913cab2bb3Spatrick  else:  # If there is just on file, print the PCs in order.
923cab2bb3Spatrick    s = ReadOneFile(files[0])
933cab2bb3Spatrick    sys.stderr.write("%s: 1 file merged; %d PCs total\n" % (prog_name, len(s)))
943cab2bb3Spatrick  for i in s:
953cab2bb3Spatrick    print("0x%x" % i)
963cab2bb3Spatrick
973cab2bb3Spatrickdef MergeAndPrint(files):
983cab2bb3Spatrick  if sys.stdout.isatty():
993cab2bb3Spatrick    Usage()
1003cab2bb3Spatrick  s = Merge(files)
1013cab2bb3Spatrick  bits = 32
1023cab2bb3Spatrick  if max(s) > 0xFFFFFFFF:
1033cab2bb3Spatrick    bits = 64
1043cab2bb3Spatrick  stdout_buf = getattr(sys.stdout, 'buffer', sys.stdout)
1053cab2bb3Spatrick  array.array('I', MagicForBits(bits)).tofile(stdout_buf)
1063cab2bb3Spatrick  a = struct.pack(TypeCodeForStruct(bits) * len(s), *s)
1073cab2bb3Spatrick  stdout_buf.write(a)
1083cab2bb3Spatrick
1093cab2bb3Spatrick
1103cab2bb3Spatrickdef UnpackOneFile(path):
1113cab2bb3Spatrick  with open(path, mode="rb") as f:
1123cab2bb3Spatrick    sys.stderr.write("%s: unpacking %s\n" % (prog_name, path))
1133cab2bb3Spatrick    while True:
1143cab2bb3Spatrick      header = f.read(12)
1153cab2bb3Spatrick      if not header: return
1163cab2bb3Spatrick      if len(header) < 12:
1173cab2bb3Spatrick        break
1183cab2bb3Spatrick      pid, module_length, blob_size = struct.unpack('iII', header)
1193cab2bb3Spatrick      module = f.read(module_length).decode('utf-8')
1203cab2bb3Spatrick      blob = f.read(blob_size)
1213cab2bb3Spatrick      assert(len(module) == module_length)
1223cab2bb3Spatrick      assert(len(blob) == blob_size)
1233cab2bb3Spatrick      extracted_file = "%s.%d.sancov" % (module, pid)
1243cab2bb3Spatrick      sys.stderr.write("%s: extracting %s\n" % (prog_name, extracted_file))
1253cab2bb3Spatrick      # The packed file may contain multiple blobs for the same pid/module
1263cab2bb3Spatrick      # pair. Append to the end of the file instead of overwriting.
1273cab2bb3Spatrick      with open(extracted_file, 'ab') as f2:
1283cab2bb3Spatrick        f2.write(blob)
1293cab2bb3Spatrick    # fail
1303cab2bb3Spatrick    raise Exception('Error reading file %s' % path)
1313cab2bb3Spatrick
1323cab2bb3Spatrick
1333cab2bb3Spatrickdef Unpack(files):
1343cab2bb3Spatrick  for f in files:
1353cab2bb3Spatrick    UnpackOneFile(f)
1363cab2bb3Spatrick
1373cab2bb3Spatrickdef UnpackOneRawFile(path, map_path):
1383cab2bb3Spatrick  mem_map = []
1393cab2bb3Spatrick  with open(map_path, mode="rt") as f_map:
1403cab2bb3Spatrick    sys.stderr.write("%s: reading map %s\n" % (prog_name, map_path))
1413cab2bb3Spatrick    bits = int(f_map.readline())
1423cab2bb3Spatrick    if bits != 32 and bits != 64:
1433cab2bb3Spatrick      raise Exception('Wrong bits size in the map')
1443cab2bb3Spatrick    for line in f_map:
1453cab2bb3Spatrick      parts = line.rstrip().split()
1463cab2bb3Spatrick      mem_map.append((int(parts[0], 16),
1473cab2bb3Spatrick                  int(parts[1], 16),
1483cab2bb3Spatrick                  int(parts[2], 16),
1493cab2bb3Spatrick                  ' '.join(parts[3:])))
1503cab2bb3Spatrick  mem_map.sort(key=lambda m : m[0])
1513cab2bb3Spatrick  mem_map_keys = [m[0] for m in mem_map]
1523cab2bb3Spatrick
1533cab2bb3Spatrick  with open(path, mode="rb") as f:
1543cab2bb3Spatrick    sys.stderr.write("%s: unpacking %s\n" % (prog_name, path))
1553cab2bb3Spatrick
1563cab2bb3Spatrick    f.seek(0, 2)
1573cab2bb3Spatrick    size = f.tell()
1583cab2bb3Spatrick    f.seek(0, 0)
1593cab2bb3Spatrick    pcs = struct.unpack_from(TypeCodeForStruct(bits) * (size * 8 // bits), f.read(size))
1603cab2bb3Spatrick    mem_map_pcs = [[] for i in range(0, len(mem_map))]
1613cab2bb3Spatrick
1623cab2bb3Spatrick    for pc in pcs:
1633cab2bb3Spatrick      if pc == 0: continue
1643cab2bb3Spatrick      map_idx = bisect.bisect(mem_map_keys, pc) - 1
1653cab2bb3Spatrick      (start, end, base, module_path) = mem_map[map_idx]
1663cab2bb3Spatrick      assert pc >= start
1673cab2bb3Spatrick      if pc >= end:
1683cab2bb3Spatrick        sys.stderr.write("warning: %s: pc %x outside of any known mapping\n" % (prog_name, pc))
1693cab2bb3Spatrick        continue
1703cab2bb3Spatrick      mem_map_pcs[map_idx].append(pc - base)
1713cab2bb3Spatrick
1723cab2bb3Spatrick    for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs):
1733cab2bb3Spatrick      if len(pc_list) == 0: continue
1743cab2bb3Spatrick      assert path.endswith('.sancov.raw')
1753cab2bb3Spatrick      dst_path = module_path + '.' + os.path.basename(path)[:-4]
1763cab2bb3Spatrick      sys.stderr.write("%s: writing %d PCs to %s\n" % (prog_name, len(pc_list), dst_path))
1773cab2bb3Spatrick      sorted_pc_list = sorted(pc_list)
1783cab2bb3Spatrick      pc_buffer = struct.pack(TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list)
1793cab2bb3Spatrick      with open(dst_path, 'ab+') as f2:
1803cab2bb3Spatrick        array.array('I', MagicForBits(bits)).tofile(f2)
1813cab2bb3Spatrick        f2.seek(0, 2)
1823cab2bb3Spatrick        f2.write(pc_buffer)
1833cab2bb3Spatrick
1843cab2bb3Spatrickdef RawUnpack(files):
1853cab2bb3Spatrick  for f in files:
1863cab2bb3Spatrick    if not f.endswith('.sancov.raw'):
1873cab2bb3Spatrick      raise Exception('Unexpected raw file name %s' % f)
1883cab2bb3Spatrick    f_map = f[:-3] + 'map'
1893cab2bb3Spatrick    UnpackOneRawFile(f, f_map)
1903cab2bb3Spatrick
1913cab2bb3Spatrickdef GetInstrumentedPCs(binary):
1923cab2bb3Spatrick  # This looks scary, but all it does is extract all offsets where we call:
1933cab2bb3Spatrick  # - __sanitizer_cov() or __sanitizer_cov_with_check(),
1943cab2bb3Spatrick  # - with call or callq,
1953cab2bb3Spatrick  # - directly or via PLT.
196*d89ec533Spatrick  cmd = r"objdump --no-show-raw-insn -d %s | " \
197*d89ec533Spatrick        r"grep '^\s\+[0-9a-f]\+:\s\+call\(q\|\)\s\+\(0x\|\)[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\|_trace_pc_guard\)\(@plt\|\)>' | " \
198*d89ec533Spatrick        r"grep -o '^\s\+[0-9a-f]\+'" % binary
199*d89ec533Spatrick  lines = subprocess.check_output(cmd, stdin=subprocess.PIPE, shell=True).splitlines()
2003cab2bb3Spatrick  # The PCs we get from objdump are off by 4 bytes, as they point to the
2013cab2bb3Spatrick  # beginning of the callq instruction. Empirically this is true on x86 and
2023cab2bb3Spatrick  # x86_64.
203*d89ec533Spatrick  return set(int(line.strip(), 16) + 4 for line in lines)
2043cab2bb3Spatrick
2053cab2bb3Spatrickdef PrintMissing(binary):
2063cab2bb3Spatrick  if not os.path.isfile(binary):
2073cab2bb3Spatrick    raise Exception('File not found: %s' % binary)
2083cab2bb3Spatrick  instrumented = GetInstrumentedPCs(binary)
2093cab2bb3Spatrick  sys.stderr.write("%s: found %d instrumented PCs in %s\n" % (prog_name,
2103cab2bb3Spatrick                                                              len(instrumented),
2113cab2bb3Spatrick                                                              binary))
2123cab2bb3Spatrick  covered = set(int(line, 16) for line in sys.stdin)
2133cab2bb3Spatrick  sys.stderr.write("%s: read %d PCs from stdin\n" % (prog_name, len(covered)))
2143cab2bb3Spatrick  missing = instrumented - covered
2153cab2bb3Spatrick  sys.stderr.write("%s: %d PCs missing from coverage\n" % (prog_name, len(missing)))
2163cab2bb3Spatrick  if (len(missing) > len(instrumented) - len(covered)):
2173cab2bb3Spatrick    sys.stderr.write(
2183cab2bb3Spatrick      "%s: WARNING: stdin contains PCs not found in binary\n" % prog_name
2193cab2bb3Spatrick    )
2203cab2bb3Spatrick  for pc in sorted(missing):
2213cab2bb3Spatrick    print("0x%x" % pc)
2223cab2bb3Spatrick
2233cab2bb3Spatrickif __name__ == '__main__':
2243cab2bb3Spatrick  prog_name = sys.argv[0]
2253cab2bb3Spatrick  if len(sys.argv) <= 2:
2263cab2bb3Spatrick    Usage();
2273cab2bb3Spatrick
2283cab2bb3Spatrick  if sys.argv[1] == "missing":
2293cab2bb3Spatrick    if len(sys.argv) != 3:
2303cab2bb3Spatrick      Usage()
2313cab2bb3Spatrick    PrintMissing(sys.argv[2])
2323cab2bb3Spatrick    exit(0)
2333cab2bb3Spatrick
2343cab2bb3Spatrick  file_list = []
2353cab2bb3Spatrick  for f in sys.argv[2:]:
2363cab2bb3Spatrick    file_list += glob.glob(f)
2373cab2bb3Spatrick  if not file_list:
2383cab2bb3Spatrick    Usage()
2393cab2bb3Spatrick
2403cab2bb3Spatrick  if sys.argv[1] == "print":
2413cab2bb3Spatrick    PrintFiles(file_list)
2423cab2bb3Spatrick  elif sys.argv[1] == "merge":
2433cab2bb3Spatrick    MergeAndPrint(file_list)
2443cab2bb3Spatrick  elif sys.argv[1] == "unpack":
2453cab2bb3Spatrick    Unpack(file_list)
2463cab2bb3Spatrick  elif sys.argv[1] == "rawunpack":
2473cab2bb3Spatrick    RawUnpack(file_list)
2483cab2bb3Spatrick  else:
2493cab2bb3Spatrick    Usage()
250