xref: /llvm-project/lldb/examples/python/bsd.py (revision 2238dcc39358353cac21df75c3c3286ab20b8f53)
1#!/usr/bin/env python
2
3import cmd
4import optparse
5import os
6import shlex
7import struct
8import sys
9
10ARMAG = "!<arch>\n"
11SARMAG = 8
12ARFMAG = "`\n"
13AR_EFMT1 = "#1/"
14
15
16def memdump(src, bytes_per_line=16, address=0):
17    FILTER = "".join([(len(repr(chr(x))) == 3) and chr(x) or "." for x in range(256)])
18    for i in range(0, len(src), bytes_per_line):
19        s = src[i : i + bytes_per_line]
20        hex_bytes = " ".join(["%02x" % (ord(x)) for x in s])
21        ascii = s.translate(FILTER)
22        print("%#08.8x: %-*s %s" % (address + i, bytes_per_line * 3, hex_bytes, ascii))
23
24
25class Object(object):
26    def __init__(self, file):
27        def read_str(file, str_len):
28            return file.read(str_len).rstrip("\0 ")
29
30        def read_int(file, str_len, base):
31            return int(read_str(file, str_len), base)
32
33        self.offset = file.tell()
34        self.file = file
35        self.name = read_str(file, 16)
36        self.date = read_int(file, 12, 10)
37        self.uid = read_int(file, 6, 10)
38        self.gid = read_int(file, 6, 10)
39        self.mode = read_int(file, 8, 8)
40        self.size = read_int(file, 10, 10)
41        if file.read(2) != ARFMAG:
42            raise ValueError("invalid BSD object at offset %#08.8x" % (self.offset))
43        # If we have an extended name read it. Extended names start with
44        name_len = 0
45        if self.name.startswith(AR_EFMT1):
46            name_len = int(self.name[len(AR_EFMT1) :], 10)
47            self.name = read_str(file, name_len)
48        self.obj_offset = file.tell()
49        self.obj_size = self.size - name_len
50        file.seek(self.obj_size, 1)
51
52    def dump(self, f=sys.stdout, flat=True):
53        if flat:
54            f.write(
55                "%#08.8x: %#08.8x %5u %5u %6o %#08.8x %s\n"
56                % (
57                    self.offset,
58                    self.date,
59                    self.uid,
60                    self.gid,
61                    self.mode,
62                    self.size,
63                    self.name,
64                )
65            )
66        else:
67            f.write("%#08.8x: \n" % self.offset)
68            f.write(' name = "%s"\n' % self.name)
69            f.write(" date = %#08.8x\n" % self.date)
70            f.write("  uid = %i\n" % self.uid)
71            f.write("  gid = %i\n" % self.gid)
72            f.write(" mode = %o\n" % self.mode)
73            f.write(" size = %#08.8x\n" % (self.size))
74            self.file.seek(self.obj_offset, 0)
75            first_bytes = self.file.read(4)
76            f.write("bytes = ")
77            memdump(first_bytes)
78
79    def get_bytes(self):
80        saved_pos = self.file.tell()
81        self.file.seek(self.obj_offset, 0)
82        bytes = self.file.read(self.obj_size)
83        self.file.seek(saved_pos, 0)
84        return bytes
85
86    def save(self, path=None, overwrite=False):
87        """
88        Save the contents of the object to disk using 'path' argument as
89        the path, or save it to the current working directory using the
90        object name.
91        """
92
93        if path is None:
94            path = self.name
95        if not overwrite and os.path.exists(path):
96            print('error: outfile "%s" already exists' % (path))
97            return
98        print('Saving "%s" to "%s"...' % (self.name, path))
99        with open(path, "w") as f:
100            f.write(self.get_bytes())
101
102
103class StringTable(object):
104    def __init__(self, bytes):
105        self.bytes = bytes
106
107    def get_string(self, offset):
108        length = len(self.bytes)
109        if offset >= length:
110            return None
111        return self.bytes[offset : self.bytes.find("\0", offset)]
112
113
114class Archive(object):
115    def __init__(self, path):
116        self.path = path
117        self.file = open(path, "r")
118        self.objects = []
119        self.offset_to_object = {}
120        if self.file.read(SARMAG) != ARMAG:
121            print("error: file isn't a BSD archive")
122        while True:
123            try:
124                self.objects.append(Object(self.file))
125            except ValueError:
126                break
127
128    def get_object_at_offset(self, offset):
129        if offset in self.offset_to_object:
130            return self.offset_to_object[offset]
131        for obj in self.objects:
132            if obj.offset == offset:
133                self.offset_to_object[offset] = obj
134                return obj
135        return None
136
137    def find(self, name, mtime=None, f=sys.stdout):
138        """
139        Find an object(s) by name with optional modification time. There
140        can be multple objects with the same name inside and possibly with
141        the same modification time within a BSD archive so clients must be
142        prepared to get multiple results.
143        """
144        matches = []
145        for obj in self.objects:
146            if obj.name == name and (mtime is None or mtime == obj.date):
147                matches.append(obj)
148        return matches
149
150    @classmethod
151    def dump_header(self, f=sys.stdout):
152        f.write("            DATE       UID   GID   MODE   SIZE       NAME\n")
153        f.write(
154            "            ---------- ----- ----- ------ ---------- " "--------------\n"
155        )
156
157    def get_symdef(self):
158        def get_uint32(file):
159            """Extract a uint32_t from the current file position."""
160            (v,) = struct.unpack("=I", file.read(4))
161            return v
162
163        for obj in self.objects:
164            symdef = []
165            if obj.name.startswith("__.SYMDEF"):
166                self.file.seek(obj.obj_offset, 0)
167                ranlib_byte_size = get_uint32(self.file)
168                num_ranlib_structs = ranlib_byte_size / 8
169                str_offset_pairs = []
170                for _ in range(num_ranlib_structs):
171                    strx = get_uint32(self.file)
172                    offset = get_uint32(self.file)
173                    str_offset_pairs.append((strx, offset))
174                strtab_len = get_uint32(self.file)
175                strtab = StringTable(self.file.read(strtab_len))
176                for s in str_offset_pairs:
177                    symdef.append((strtab.get_string(s[0]), s[1]))
178            return symdef
179
180    def get_object_dicts(self):
181        """
182        Returns an array of object dictionaries that contain they following
183        keys:
184            'object': the actual bsd.Object instance
185            'symdefs': an array of symbol names that the object contains
186                       as found in the "__.SYMDEF" item in the archive
187        """
188        symdefs = self.get_symdef()
189        symdef_dict = {}
190        if symdefs:
191            for name, offset in symdefs:
192                if offset in symdef_dict:
193                    object_dict = symdef_dict[offset]
194                else:
195                    object_dict = {
196                        "object": self.get_object_at_offset(offset),
197                        "symdefs": [],
198                    }
199                    symdef_dict[offset] = object_dict
200                object_dict["symdefs"].append(name)
201        object_dicts = []
202        for offset in sorted(symdef_dict):
203            object_dicts.append(symdef_dict[offset])
204        return object_dicts
205
206    def dump(self, f=sys.stdout, flat=True):
207        f.write("%s:\n" % self.path)
208        if flat:
209            self.dump_header(f=f)
210        for obj in self.objects:
211            obj.dump(f=f, flat=flat)
212
213
214class Interactive(cmd.Cmd):
215    """Interactive prompt for exploring contents of BSD archive files, type
216    "help" to see a list of supported commands."""
217
218    image_option_parser = None
219
220    def __init__(self, archives):
221        cmd.Cmd.__init__(self)
222        self.use_rawinput = False
223        self.intro = (
224            'Interactive  BSD archive prompt, type "help" to see a '
225            "list of supported commands."
226        )
227        self.archives = archives
228        self.prompt = "% "
229
230    def default(self, line):
231        """Catch all for unknown command, which will exit the interpreter."""
232        print("unknown command: %s" % line)
233        return True
234
235    def do_q(self, line):
236        """Quit command"""
237        return True
238
239    def do_quit(self, line):
240        """Quit command"""
241        return True
242
243    def do_extract(self, line):
244        args = shlex.split(line)
245        if args:
246            extracted = False
247            for object_name in args:
248                for archive in self.archives:
249                    matches = archive.find(object_name)
250                    if matches:
251                        for object in matches:
252                            object.save(overwrite=False)
253                            extracted = True
254            if not extracted:
255                print('error: no object matches "%s" in any archives' % (object_name))
256        else:
257            print("error: must specify the name of an object to extract")
258
259    def do_ls(self, line):
260        args = shlex.split(line)
261        if args:
262            for object_name in args:
263                for archive in self.archives:
264                    matches = archive.find(object_name)
265                    if matches:
266                        for object in matches:
267                            object.dump(flat=False)
268                    else:
269                        print(
270                            'error: no object matches "%s" in "%s"'
271                            % (object_name, archive.path)
272                        )
273        else:
274            for archive in self.archives:
275                archive.dump(flat=True)
276                print("")
277
278
279def main():
280    parser = optparse.OptionParser(prog="bsd", description="Utility for BSD archives")
281    parser.add_option(
282        "--object",
283        type="string",
284        dest="object_name",
285        default=None,
286        help=(
287            "Specify the name of a object within the BSD archive to get "
288            "information on"
289        ),
290    )
291    parser.add_option(
292        "-s",
293        "--symbol",
294        type="string",
295        dest="find_symbol",
296        default=None,
297        help=(
298            "Specify the name of a symbol within the BSD archive to get "
299            "information on from SYMDEF"
300        ),
301    )
302    parser.add_option(
303        "--symdef",
304        action="store_true",
305        dest="symdef",
306        default=False,
307        help=("Dump the information in the SYMDEF."),
308    )
309    parser.add_option(
310        "-v",
311        "--verbose",
312        action="store_true",
313        dest="verbose",
314        default=False,
315        help="Enable verbose output",
316    )
317    parser.add_option(
318        "-e",
319        "--extract",
320        action="store_true",
321        dest="extract",
322        default=False,
323        help=(
324            "Specify this to extract the object specified with the --object "
325            "option. There must be only one object with a matching name or "
326            "the --mtime option must be specified to uniquely identify a "
327            "single object."
328        ),
329    )
330    parser.add_option(
331        "-m",
332        "--mtime",
333        type="int",
334        dest="mtime",
335        default=None,
336        help=(
337            "Specify the modification time of the object an object. This "
338            "option is used with either the --object or --extract options."
339        ),
340    )
341    parser.add_option(
342        "-o",
343        "--outfile",
344        type="string",
345        dest="outfile",
346        default=None,
347        help=(
348            "Specify a different name or path for the file to extract when "
349            "using the --extract option. If this option isn't specified, "
350            "then the extracted object file will be extracted into the "
351            "current working directory if a file doesn't already exist "
352            "with that name."
353        ),
354    )
355    parser.add_option(
356        "-i",
357        "--interactive",
358        action="store_true",
359        dest="interactive",
360        default=False,
361        help=(
362            "Enter an interactive shell that allows users to interactively "
363            "explore contents of .a files."
364        ),
365    )
366
367    (options, args) = parser.parse_args(sys.argv[1:])
368
369    if options.interactive:
370        archives = []
371        for path in args:
372            archives.append(Archive(path))
373        interpreter = Interactive(archives)
374        interpreter.cmdloop()
375        return
376
377    for path in args:
378        archive = Archive(path)
379        if options.object_name:
380            print("%s:\n" % (path))
381            matches = archive.find(options.object_name, options.mtime)
382            if matches:
383                dump_all = True
384                if options.extract:
385                    if len(matches) == 1:
386                        dump_all = False
387                        matches[0].save(path=options.outfile, overwrite=False)
388                    else:
389                        print(
390                            'error: multiple objects match "%s". Specify '
391                            "the modification time using --mtime."
392                            % (options.object_name)
393                        )
394                if dump_all:
395                    for obj in matches:
396                        obj.dump(flat=False)
397            else:
398                print('error: object "%s" not found in archive' % (options.object_name))
399        elif options.find_symbol:
400            symdefs = archive.get_symdef()
401            if symdefs:
402                success = False
403                for name, offset in symdefs:
404                    obj = archive.get_object_at_offset(offset)
405                    if name == options.find_symbol:
406                        print('Found "%s" in:' % (options.find_symbol))
407                        obj.dump(flat=False)
408                        success = True
409                if not success:
410                    print('Didn\'t find "%s" in any objects' % (options.find_symbol))
411            else:
412                print("error: no __.SYMDEF was found")
413        elif options.symdef:
414            object_dicts = archive.get_object_dicts()
415            for object_dict in object_dicts:
416                object_dict["object"].dump(flat=False)
417                print("symbols:")
418                for name in object_dict["symdefs"]:
419                    print("  %s" % (name))
420        else:
421            archive.dump(flat=not options.verbose)
422
423
424if __name__ == "__main__":
425    main()
426
427
428def print_mtime_error(result, dmap_mtime, actual_mtime):
429    print(
430        "error: modification time in debug map (%#08.8x) doesn't "
431        "match the .o file modification time (%#08.8x)" % (dmap_mtime, actual_mtime),
432        file=result,
433    )
434
435
436def print_file_missing_error(result, path):
437    print('error: file "%s" doesn\'t exist' % (path), file=result)
438
439
440def print_multiple_object_matches(result, object_name, mtime, matches):
441    print(
442        "error: multiple matches for object '%s' with with "
443        "modification time %#08.8x:" % (object_name, mtime),
444        file=result,
445    )
446    Archive.dump_header(f=result)
447    for match in matches:
448        match.dump(f=result, flat=True)
449
450
451def print_archive_object_error(result, object_name, mtime, archive):
452    matches = archive.find(object_name, f=result)
453    if len(matches) > 0:
454        print(
455            "error: no objects have a modification time that "
456            "matches %#08.8x for '%s'. Potential matches:" % (mtime, object_name),
457            file=result,
458        )
459        Archive.dump_header(f=result)
460        for match in matches:
461            match.dump(f=result, flat=True)
462    else:
463        print(
464            'error: no object named "%s" found in archive:' % (object_name), file=result
465        )
466        Archive.dump_header(f=result)
467        for match in archive.objects:
468            match.dump(f=result, flat=True)
469        # archive.dump(f=result, flat=True)
470
471
472class VerifyDebugMapCommand:
473    name = "verify-debug-map-objects"
474
475    def create_options(self):
476        usage = "usage: %prog [options]"
477        description = """This command reports any .o files that are missing
478or whose modification times don't match in the debug map of an executable."""
479
480        self.parser = optparse.OptionParser(
481            description=description, prog=self.name, usage=usage, add_help_option=False
482        )
483
484        self.parser.add_option(
485            "-e",
486            "--errors",
487            action="store_true",
488            dest="errors",
489            default=False,
490            help="Only show errors",
491        )
492
493    def get_short_help(self):
494        return "Verify debug map object files."
495
496    def get_long_help(self):
497        return self.help_string
498
499    def __init__(self, debugger, unused):
500        self.create_options()
501        self.help_string = self.parser.format_help()
502
503    def __call__(self, debugger, command, exe_ctx, result):
504        import lldb
505
506        # Use the Shell Lexer to properly parse up command options just like a
507        # shell would
508        command_args = shlex.split(command)
509
510        try:
511            (options, args) = self.parser.parse_args(command_args)
512        except:
513            result.SetError("option parsing failed")
514            return
515
516        # Always get program state from the SBExecutionContext passed in
517        target = exe_ctx.GetTarget()
518        if not target.IsValid():
519            result.SetError("invalid target")
520            return
521        archives = {}
522        for module_spec in args:
523            module = target.module[module_spec]
524            if not (module and module.IsValid()):
525                result.SetError(
526                    'error: invalid module specification: "%s". '
527                    "Specify the full path, basename, or UUID of "
528                    "a module " % (module_spec)
529                )
530                return
531            num_symbols = module.GetNumSymbols()
532            num_errors = 0
533            for i in range(num_symbols):
534                symbol = module.GetSymbolAtIndex(i)
535                if symbol.GetType() != lldb.eSymbolTypeObjectFile:
536                    continue
537                path = symbol.GetName()
538                if not path:
539                    continue
540                # Extract the value of the symbol by dumping the
541                # symbol. The value is the mod time.
542                dmap_mtime = int(str(symbol).split("value = ")[1].split(",")[0], 16)
543                if not options.errors:
544                    print("%s" % (path), file=result)
545                if os.path.exists(path):
546                    actual_mtime = int(os.stat(path).st_mtime)
547                    if dmap_mtime != actual_mtime:
548                        num_errors += 1
549                        if options.errors:
550                            print("%s" % (path), end=" ", file=result)
551                        print_mtime_error(result, dmap_mtime, actual_mtime)
552                elif path[-1] == ")":
553                    (archive_path, object_name) = path[0:-1].split("(")
554                    if not archive_path and not object_name:
555                        num_errors += 1
556                        if options.errors:
557                            print("%s" % (path), end=" ", file=result)
558                        print_file_missing_error(path)
559                        continue
560                    if not os.path.exists(archive_path):
561                        num_errors += 1
562                        if options.errors:
563                            print("%s" % (path), end=" ", file=result)
564                        print_file_missing_error(archive_path)
565                        continue
566                    if archive_path in archives:
567                        archive = archives[archive_path]
568                    else:
569                        archive = Archive(archive_path)
570                        archives[archive_path] = archive
571                    matches = archive.find(object_name, dmap_mtime)
572                    num_matches = len(matches)
573                    if num_matches == 1:
574                        print("1 match", file=result)
575                        obj = matches[0]
576                        if obj.date != dmap_mtime:
577                            num_errors += 1
578                            if options.errors:
579                                print("%s" % (path), end=" ", file=result)
580                            print_mtime_error(result, dmap_mtime, obj.date)
581                    elif num_matches == 0:
582                        num_errors += 1
583                        if options.errors:
584                            print("%s" % (path), end=" ", file=result)
585                        print_archive_object_error(
586                            result, object_name, dmap_mtime, archive
587                        )
588                    elif num_matches > 1:
589                        num_errors += 1
590                        if options.errors:
591                            print("%s" % (path), end=" ", file=result)
592                        print_multiple_object_matches(
593                            result, object_name, dmap_mtime, matches
594                        )
595            if num_errors > 0:
596                print("%u errors found" % (num_errors), file=result)
597            else:
598                print("No errors detected in debug map", file=result)
599
600
601def __lldb_init_module(debugger, dict):
602    # This initializer is being run from LLDB in the embedded command
603    # interpreter.
604    # Add any commands contained in this module to LLDB
605    debugger.HandleCommand(
606        "command script add -o -c %s.VerifyDebugMapCommand %s"
607        % (__name__, VerifyDebugMapCommand.name)
608    )
609    print(
610        'The "%s" command has been installed, type "help %s" for detailed '
611        "help." % (VerifyDebugMapCommand.name, VerifyDebugMapCommand.name)
612    )
613