xref: /llvm-project/compiler-rt/lib/sanitizer_common/scripts/sancov.py (revision f98ee40f4b5d7474fc67e82824bf6abbaedb7b1c)
1#!/usr/bin/env python
2# Merge or print the coverage data collected by asan's coverage.
3# Input files are sequences of 4-byte integers.
4# We need to merge these integers into a set and then
5# either print them (as hex) or dump them into another file.
6import array
7import bisect
8import glob
9import os.path
10import struct
11import subprocess
12import sys
13
14prog_name = ""
15
16
17def Usage():
18    sys.stderr.write(
19        "Usage: \n" + " " + prog_name + " merge FILE [FILE...] > OUTPUT\n"
20        " " + prog_name + " print FILE [FILE...]\n"
21        " " + prog_name + " unpack FILE [FILE...]\n"
22        " " + prog_name + " rawunpack FILE [FILE ...]\n"
23        " " + prog_name + " missing BINARY < LIST_OF_PCS\n"
24        "\n"
25    )
26    exit(1)
27
28
29def CheckBits(bits):
30    if bits != 32 and bits != 64:
31        raise Exception("Wrong bitness: %d" % bits)
32
33
34def TypeCodeForBits(bits):
35    CheckBits(bits)
36    return "L" if bits == 64 else "I"
37
38
39def TypeCodeForStruct(bits):
40    CheckBits(bits)
41    return "Q" if bits == 64 else "I"
42
43
44kMagic32SecondHalf = 0xFFFFFF32
45kMagic64SecondHalf = 0xFFFFFF64
46kMagicFirstHalf = 0xC0BFFFFF
47
48
49def MagicForBits(bits):
50    CheckBits(bits)
51    if sys.byteorder == "little":
52        return [
53            kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf,
54            kMagicFirstHalf,
55        ]
56    else:
57        return [
58            kMagicFirstHalf,
59            kMagic64SecondHalf if bits == 64 else kMagic32SecondHalf,
60        ]
61
62
63def ReadMagicAndReturnBitness(f, path):
64    magic_bytes = f.read(8)
65    magic_words = struct.unpack("II", magic_bytes)
66    bits = 0
67    idx = 1 if sys.byteorder == "little" else 0
68    if magic_words[idx] == kMagicFirstHalf:
69        if magic_words[1 - idx] == kMagic64SecondHalf:
70            bits = 64
71        elif magic_words[1 - idx] == kMagic32SecondHalf:
72            bits = 32
73    if bits == 0:
74        raise Exception("Bad magic word in %s" % path)
75    return bits
76
77
78def ReadOneFile(path):
79    with open(path, mode="rb") as f:
80        f.seek(0, 2)
81        size = f.tell()
82        f.seek(0, 0)
83        if size < 8:
84            raise Exception("File %s is short (< 8 bytes)" % path)
85        bits = ReadMagicAndReturnBitness(f, path)
86        size -= 8
87        w = size * 8 // bits
88        s = struct.unpack_from(TypeCodeForStruct(bits) * (w), f.read(size))
89    sys.stderr.write("%s: read %d %d-bit PCs from %s\n" % (prog_name, w, bits, path))
90    return s
91
92
93def Merge(files):
94    s = set()
95    for f in files:
96        s = s.union(set(ReadOneFile(f)))
97    sys.stderr.write(
98        "%s: %d files merged; %d PCs total\n" % (prog_name, len(files), len(s))
99    )
100    return sorted(s)
101
102
103def PrintFiles(files):
104    if len(files) > 1:
105        s = Merge(files)
106    else:  # If there is just on file, print the PCs in order.
107        s = ReadOneFile(files[0])
108        sys.stderr.write("%s: 1 file merged; %d PCs total\n" % (prog_name, len(s)))
109    for i in s:
110        print("0x%x" % i)
111
112
113def MergeAndPrint(files):
114    if sys.stdout.isatty():
115        Usage()
116    s = Merge(files)
117    bits = 32
118    if max(s) > 0xFFFFFFFF:
119        bits = 64
120    stdout_buf = getattr(sys.stdout, "buffer", sys.stdout)
121    array.array("I", MagicForBits(bits)).tofile(stdout_buf)
122    a = struct.pack(TypeCodeForStruct(bits) * len(s), *s)
123    stdout_buf.write(a)
124
125
126def UnpackOneFile(path):
127    with open(path, mode="rb") as f:
128        sys.stderr.write("%s: unpacking %s\n" % (prog_name, path))
129        while True:
130            header = f.read(12)
131            if not header:
132                return
133            if len(header) < 12:
134                break
135            pid, module_length, blob_size = struct.unpack("iII", header)
136            module = f.read(module_length).decode("utf-8")
137            blob = f.read(blob_size)
138            assert len(module) == module_length
139            assert len(blob) == blob_size
140            extracted_file = "%s.%d.sancov" % (module, pid)
141            sys.stderr.write("%s: extracting %s\n" % (prog_name, extracted_file))
142            # The packed file may contain multiple blobs for the same pid/module
143            # pair. Append to the end of the file instead of overwriting.
144            with open(extracted_file, "ab") as f2:
145                f2.write(blob)
146        # fail
147        raise Exception("Error reading file %s" % path)
148
149
150def Unpack(files):
151    for f in files:
152        UnpackOneFile(f)
153
154
155def UnpackOneRawFile(path, map_path):
156    mem_map = []
157    with open(map_path, mode="rt") as f_map:
158        sys.stderr.write("%s: reading map %s\n" % (prog_name, map_path))
159        bits = int(f_map.readline())
160        if bits != 32 and bits != 64:
161            raise Exception("Wrong bits size in the map")
162        for line in f_map:
163            parts = line.rstrip().split()
164            mem_map.append(
165                (
166                    int(parts[0], 16),
167                    int(parts[1], 16),
168                    int(parts[2], 16),
169                    " ".join(parts[3:]),
170                )
171            )
172    mem_map.sort(key=lambda m: m[0])
173    mem_map_keys = [m[0] for m in mem_map]
174
175    with open(path, mode="rb") as f:
176        sys.stderr.write("%s: unpacking %s\n" % (prog_name, path))
177
178        f.seek(0, 2)
179        size = f.tell()
180        f.seek(0, 0)
181        pcs = struct.unpack_from(
182            TypeCodeForStruct(bits) * (size * 8 // bits), f.read(size)
183        )
184        mem_map_pcs = [[] for i in range(0, len(mem_map))]
185
186        for pc in pcs:
187            if pc == 0:
188                continue
189            map_idx = bisect.bisect(mem_map_keys, pc) - 1
190            (start, end, base, module_path) = mem_map[map_idx]
191            assert pc >= start
192            if pc >= end:
193                sys.stderr.write(
194                    "warning: %s: pc %x outside of any known mapping\n"
195                    % (prog_name, pc)
196                )
197                continue
198            mem_map_pcs[map_idx].append(pc - base)
199
200        for ((start, end, base, module_path), pc_list) in zip(mem_map, mem_map_pcs):
201            if len(pc_list) == 0:
202                continue
203            assert path.endswith(".sancov.raw")
204            dst_path = module_path + "." + os.path.basename(path)[:-4]
205            sys.stderr.write(
206                "%s: writing %d PCs to %s\n" % (prog_name, len(pc_list), dst_path)
207            )
208            sorted_pc_list = sorted(pc_list)
209            pc_buffer = struct.pack(
210                TypeCodeForStruct(bits) * len(pc_list), *sorted_pc_list
211            )
212            with open(dst_path, "ab+") as f2:
213                array.array("I", MagicForBits(bits)).tofile(f2)
214                f2.seek(0, 2)
215                f2.write(pc_buffer)
216
217
218def RawUnpack(files):
219    for f in files:
220        if not f.endswith(".sancov.raw"):
221            raise Exception("Unexpected raw file name %s" % f)
222        f_map = f[:-3] + "map"
223        UnpackOneRawFile(f, f_map)
224
225
226def GetInstrumentedPCs(binary):
227    # This looks scary, but all it does is extract all offsets where we call:
228    # - __sanitizer_cov() or __sanitizer_cov_with_check(),
229    # - with call or callq,
230    # - directly or via PLT.
231    cmd = (
232        r"objdump --no-show-raw-insn -d %s | "
233        r"grep '^\s\+[0-9a-f]\+:\s\+call\(q\|\)\s\+\(0x\|\)[0-9a-f]\+ <__sanitizer_cov\(_with_check\|\|_trace_pc_guard\)\(@plt\|\)>' | "
234        r"grep -o '^\s\+[0-9a-f]\+'" % binary
235    )
236    lines = subprocess.check_output(cmd, stdin=subprocess.PIPE, shell=True).splitlines()
237    # The PCs we get from objdump are off by 4 bytes, as they point to the
238    # beginning of the callq instruction. Empirically this is true on x86 and
239    # x86_64.
240    return set(int(line.strip(), 16) + 4 for line in lines)
241
242
243def PrintMissing(binary):
244    if not os.path.isfile(binary):
245        raise Exception("File not found: %s" % binary)
246    instrumented = GetInstrumentedPCs(binary)
247    sys.stderr.write(
248        "%s: found %d instrumented PCs in %s\n" % (prog_name, len(instrumented), binary)
249    )
250    covered = set(int(line, 16) for line in sys.stdin)
251    sys.stderr.write("%s: read %d PCs from stdin\n" % (prog_name, len(covered)))
252    missing = instrumented - covered
253    sys.stderr.write("%s: %d PCs missing from coverage\n" % (prog_name, len(missing)))
254    if len(missing) > len(instrumented) - len(covered):
255        sys.stderr.write(
256            "%s: WARNING: stdin contains PCs not found in binary\n" % prog_name
257        )
258    for pc in sorted(missing):
259        print("0x%x" % pc)
260
261
262if __name__ == "__main__":
263    prog_name = sys.argv[0]
264    if len(sys.argv) <= 2:
265        Usage()
266
267    if sys.argv[1] == "missing":
268        if len(sys.argv) != 3:
269            Usage()
270        PrintMissing(sys.argv[2])
271        exit(0)
272
273    file_list = []
274    for f in sys.argv[2:]:
275        file_list += glob.glob(f)
276    if not file_list:
277        Usage()
278
279    if sys.argv[1] == "print":
280        PrintFiles(file_list)
281    elif sys.argv[1] == "merge":
282        MergeAndPrint(file_list)
283    elif sys.argv[1] == "unpack":
284        Unpack(file_list)
285    elif sys.argv[1] == "rawunpack":
286        RawUnpack(file_list)
287    else:
288        Usage()
289