xref: /llvm-project/lldb/examples/python/crashlog.py (revision 42df155ae628b4ae756a858bd09b105ee10b86eb)
1#!/usr/bin/env python3
2
3#----------------------------------------------------------------------
4# Be sure to add the python path that points to the LLDB shared library.
5#
6# To use this in the embedded python interpreter using "lldb":
7#
8#   cd /path/containing/crashlog.py
9#   lldb
10#   (lldb) script import crashlog
11#   "crashlog" command installed, type "crashlog --help" for detailed help
12#   (lldb) crashlog ~/Library/Logs/DiagnosticReports/a.crash
13#
14# The benefit of running the crashlog command inside lldb in the
15# embedded python interpreter is when the command completes, there
16# will be a target with all of the files loaded at the locations
17# described in the crash log. Only the files that have stack frames
18# in the backtrace will be loaded unless the "--load-all" option
19# has been specified. This allows users to explore the program in the
20# state it was in right at crash time.
21#
22# On MacOSX csh, tcsh:
23#   ( setenv PYTHONPATH /path/to/LLDB.framework/Resources/Python ; ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash )
24#
25# On MacOSX sh, bash:
26#   PYTHONPATH=/path/to/LLDB.framework/Resources/Python ./crashlog.py ~/Library/Logs/DiagnosticReports/a.crash
27#----------------------------------------------------------------------
28
29import abc
30import concurrent.futures
31import contextlib
32import datetime
33import json
34import optparse
35import os
36import platform
37import plistlib
38import re
39import shlex
40import string
41import subprocess
42import sys
43import threading
44import time
45import uuid
46
47
48print_lock = threading.RLock()
49
50try:
51    # First try for LLDB in case PYTHONPATH is already correctly setup.
52    import lldb
53except ImportError:
54    # Ask the command line driver for the path to the lldb module. Copy over
55    # the environment so that SDKROOT is propagated to xcrun.
56    command =  ['xcrun', 'lldb', '-P'] if platform.system() == 'Darwin' else ['lldb', '-P']
57    # Extend the PYTHONPATH if the path exists and isn't already there.
58    lldb_python_path = subprocess.check_output(command).decode("utf-8").strip()
59    if os.path.exists(lldb_python_path) and not sys.path.__contains__(lldb_python_path):
60        sys.path.append(lldb_python_path)
61    # Try importing LLDB again.
62    try:
63        import lldb
64    except ImportError:
65        print("error: couldn't locate the 'lldb' module, please set PYTHONPATH correctly")
66        sys.exit(1)
67
68from lldb.utils import symbolication
69
70def read_plist(s):
71    if sys.version_info.major == 3:
72        return plistlib.loads(s)
73    else:
74        return plistlib.readPlistFromString(s)
75
76class CrashLog(symbolication.Symbolicator):
77    class Thread:
78        """Class that represents a thread in a darwin crash log"""
79
80        def __init__(self, index, app_specific_backtrace):
81            self.index = index
82            self.id = index
83            self.frames = list()
84            self.idents = list()
85            self.registers = dict()
86            self.reason = None
87            self.name = None
88            self.queue = None
89            self.crashed = False
90            self.app_specific_backtrace = app_specific_backtrace
91
92        def dump(self, prefix):
93            if self.app_specific_backtrace:
94                print("%Application Specific Backtrace[%u] %s" % (prefix, self.index, self.reason))
95            else:
96                print("%sThread[%u] %s" % (prefix, self.index, self.reason))
97            if self.frames:
98                print("%s  Frames:" % (prefix))
99                for frame in self.frames:
100                    frame.dump(prefix + '    ')
101            if self.registers:
102                print("%s  Registers:" % (prefix))
103                for reg in self.registers.keys():
104                    print("%s    %-8s = %#16.16x" % (prefix, reg, self.registers[reg]))
105
106        def dump_symbolicated(self, crash_log, options):
107            this_thread_crashed = self.app_specific_backtrace
108            if not this_thread_crashed:
109                this_thread_crashed = self.did_crash()
110                if options.crashed_only and this_thread_crashed == False:
111                    return
112
113            print("%s" % self)
114            display_frame_idx = -1
115            for frame_idx, frame in enumerate(self.frames):
116                disassemble = (
117                    this_thread_crashed or options.disassemble_all_threads) and frame_idx < options.disassemble_depth
118
119                # Except for the zeroth frame, we should subtract 1 from every
120                # frame pc to get the previous line entry.
121                pc = frame.pc & crash_log.addr_mask
122                pc = pc if frame_idx == 0 or pc == 0 else pc - 1
123                symbolicated_frame_addresses = crash_log.symbolicate(pc, options.verbose)
124
125                if symbolicated_frame_addresses:
126                    symbolicated_frame_address_idx = 0
127                    for symbolicated_frame_address in symbolicated_frame_addresses:
128                        display_frame_idx += 1
129                        print('[%3u] %s' % (frame_idx, symbolicated_frame_address))
130                        if (options.source_all or self.did_crash(
131                        )) and display_frame_idx < options.source_frames and options.source_context:
132                            source_context = options.source_context
133                            line_entry = symbolicated_frame_address.get_symbol_context().line_entry
134                            if line_entry.IsValid():
135                                strm = lldb.SBStream()
136                                if line_entry:
137                                    crash_log.debugger.GetSourceManager().DisplaySourceLinesWithLineNumbers(
138                                        line_entry.file, line_entry.line, source_context, source_context, "->", strm)
139                                source_text = strm.GetData()
140                                if source_text:
141                                    # Indent the source a bit
142                                    indent_str = '    '
143                                    join_str = '\n' + indent_str
144                                    print('%s%s' % (indent_str, join_str.join(source_text.split('\n'))))
145                        if symbolicated_frame_address_idx == 0:
146                            if disassemble:
147                                instructions = symbolicated_frame_address.get_instructions()
148                                if instructions:
149                                    print()
150                                    symbolication.disassemble_instructions(
151                                        crash_log.get_target(),
152                                        instructions,
153                                        frame.pc,
154                                        options.disassemble_before,
155                                        options.disassemble_after,
156                                        frame.index > 0)
157                                    print()
158                        symbolicated_frame_address_idx += 1
159                else:
160                    print(frame)
161            if self.registers:
162                print()
163                for reg in self.registers.keys():
164                    print("    %-8s = %#16.16x" % (reg, self.registers[reg]))
165            elif self.crashed:
166               print()
167               print("No thread state (register information) available")
168
169        def add_ident(self, ident):
170            if ident not in self.idents:
171                self.idents.append(ident)
172
173        def did_crash(self):
174            return self.reason is not None
175
176        def __str__(self):
177            if self.app_specific_backtrace:
178                s = "Application Specific Backtrace[%u]" % self.index
179            else:
180                s = "Thread[%u]" % self.index
181            if self.reason:
182                s += ' %s' % self.reason
183            return s
184
185    class Frame:
186        """Class that represents a stack frame in a thread in a darwin crash log"""
187
188        def __init__(self, index, pc, description):
189            self.pc = pc
190            self.description = description
191            self.index = index
192
193        def __str__(self):
194            if self.description:
195                return "[%3u] 0x%16.16x %s" % (
196                    self.index, self.pc, self.description)
197            else:
198                return "[%3u] 0x%16.16x" % (self.index, self.pc)
199
200        def dump(self, prefix):
201            print("%s%s" % (prefix, str(self)))
202
203    class DarwinImage(symbolication.Image):
204        """Class that represents a binary images in a darwin crash log"""
205        dsymForUUIDBinary = '/usr/local/bin/dsymForUUID'
206        if not os.path.exists(dsymForUUIDBinary):
207            try:
208                dsymForUUIDBinary = subprocess.check_output('which dsymForUUID',
209                                                            shell=True).decode("utf-8").rstrip('\n')
210            except:
211                dsymForUUIDBinary = ""
212
213        dwarfdump_uuid_regex = re.compile(
214            'UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*')
215
216        def __init__(
217                self,
218                text_addr_lo,
219                text_addr_hi,
220                identifier,
221                version,
222                uuid,
223                path,
224                verbose):
225            symbolication.Image.__init__(self, path, uuid)
226            self.add_section(
227                symbolication.Section(
228                    text_addr_lo,
229                    text_addr_hi,
230                    "__TEXT"))
231            self.identifier = identifier
232            self.version = version
233            self.verbose = verbose
234
235        def show_symbol_progress(self):
236            """
237            Hide progress output and errors from system frameworks as they are plentiful.
238            """
239            if self.verbose:
240                return True
241            return not (self.path.startswith("/System/Library/") or
242                        self.path.startswith("/usr/lib/"))
243
244
245        def find_matching_slice(self):
246            dwarfdump_cmd_output = subprocess.check_output(
247                'dwarfdump --uuid "%s"' % self.path, shell=True).decode("utf-8")
248            self_uuid = self.get_uuid()
249            for line in dwarfdump_cmd_output.splitlines():
250                match = self.dwarfdump_uuid_regex.search(line)
251                if match:
252                    dwarf_uuid_str = match.group(1)
253                    dwarf_uuid = uuid.UUID(dwarf_uuid_str)
254                    if self_uuid == dwarf_uuid:
255                        self.resolved_path = self.path
256                        self.arch = match.group(2)
257                        return True
258            if not self.resolved_path:
259                self.unavailable = True
260                if self.show_symbol_progress():
261                    print(("error\n    error: unable to locate '%s' with UUID %s"
262                           % (self.path, self.get_normalized_uuid_string())))
263                return False
264
265        def locate_module_and_debug_symbols(self):
266            # Don't load a module twice...
267            if self.resolved:
268                return True
269            # Mark this as resolved so we don't keep trying
270            self.resolved = True
271            uuid_str = self.get_normalized_uuid_string()
272            if self.show_symbol_progress():
273                with print_lock:
274                    print('Getting symbols for %s %s...' % (uuid_str, self.path))
275            if os.path.exists(self.dsymForUUIDBinary):
276                dsym_for_uuid_command = '%s %s' % (
277                    self.dsymForUUIDBinary, uuid_str)
278                s = subprocess.check_output(dsym_for_uuid_command, shell=True)
279                if s:
280                    try:
281                        plist_root = read_plist(s)
282                    except:
283                        with print_lock:
284                            print(("Got exception: ", sys.exc_info()[1], " handling dsymForUUID output: \n", s))
285                        raise
286                    if plist_root:
287                        plist = plist_root[uuid_str]
288                        if plist:
289                            if 'DBGArchitecture' in plist:
290                                self.arch = plist['DBGArchitecture']
291                            if 'DBGDSYMPath' in plist:
292                                self.symfile = os.path.realpath(
293                                    plist['DBGDSYMPath'])
294                            if 'DBGSymbolRichExecutable' in plist:
295                                self.path = os.path.expanduser(
296                                    plist['DBGSymbolRichExecutable'])
297                                self.resolved_path = self.path
298            if not self.resolved_path and os.path.exists(self.path):
299                if not self.find_matching_slice():
300                    return False
301            if not self.resolved_path and not os.path.exists(self.path):
302                try:
303                    mdfind_results = subprocess.check_output(
304                        ["/usr/bin/mdfind",
305                         "com_apple_xcode_dsym_uuids == %s" % uuid_str]).decode("utf-8").splitlines()
306                    found_matching_slice = False
307                    for dsym in mdfind_results:
308                        dwarf_dir = os.path.join(dsym, 'Contents/Resources/DWARF')
309                        if not os.path.exists(dwarf_dir):
310                            # Not a dSYM bundle, probably an Xcode archive.
311                            continue
312                        with print_lock:
313                            print('falling back to binary inside "%s"' % dsym)
314                        self.symfile = dsym
315                        for filename in os.listdir(dwarf_dir):
316                           self.path = os.path.join(dwarf_dir, filename)
317                           if self.find_matching_slice():
318                              found_matching_slice = True
319                              break
320                        if found_matching_slice:
321                           break
322                except:
323                    pass
324            if (self.resolved_path and os.path.exists(self.resolved_path)) or (
325                    self.path and os.path.exists(self.path)):
326                with print_lock:
327                    print('Resolved symbols for %s %s...' % (uuid_str, self.path))
328                return True
329            else:
330                self.unavailable = True
331            return False
332
333    def __init__(self, debugger, path, verbose):
334        """CrashLog constructor that take a path to a darwin crash log file"""
335        symbolication.Symbolicator.__init__(self, debugger)
336        self.path = os.path.expanduser(path)
337        self.info_lines = list()
338        self.system_profile = list()
339        self.threads = list()
340        self.backtraces = list()  # For application specific backtraces
341        self.idents = list()  # A list of the required identifiers for doing all stack backtraces
342        self.errors = list()
343        self.exception = dict()
344        self.crashed_thread_idx = -1
345        self.version = -1
346        self.target = None
347        self.verbose = verbose
348
349    def dump(self):
350        print("Crash Log File: %s" % (self.path))
351        if self.backtraces:
352            print("\nApplication Specific Backtraces:")
353            for thread in self.backtraces:
354                thread.dump('  ')
355        print("\nThreads:")
356        for thread in self.threads:
357            thread.dump('  ')
358        print("\nImages:")
359        for image in self.images:
360            image.dump('  ')
361
362    def set_main_image(self, identifier):
363        for i, image in enumerate(self.images):
364            if image.identifier == identifier:
365                self.images.insert(0, self.images.pop(i))
366                break
367
368    def find_image_with_identifier(self, identifier):
369        for image in self.images:
370            if image.identifier == identifier:
371                return image
372        regex_text = '^.*\.%s$' % (re.escape(identifier))
373        regex = re.compile(regex_text)
374        for image in self.images:
375            if regex.match(image.identifier):
376                return image
377        return None
378
379    def create_target(self):
380        if self.target is None:
381            self.target = symbolication.Symbolicator.create_target(self)
382            if self.target:
383                return self.target
384            # We weren't able to open the main executable as, but we can still
385            # symbolicate
386            print('crashlog.create_target()...2')
387            if self.idents:
388                for ident in self.idents:
389                    image = self.find_image_with_identifier(ident)
390                    if image:
391                        self.target = image.create_target(self.debugger)
392                        if self.target:
393                            return self.target  # success
394            print('crashlog.create_target()...3')
395            for image in self.images:
396                self.target = image.create_target(self.debugger)
397                if self.target:
398                    return self.target  # success
399            print('crashlog.create_target()...4')
400            print('error: Unable to locate any executables from the crash log.')
401            print('       Try loading the executable into lldb before running crashlog')
402            print('       and/or make sure the .dSYM bundles can be found by Spotlight.')
403        return self.target
404
405    def get_target(self):
406        return self.target
407
408
409class CrashLogFormatException(Exception):
410    pass
411
412
413class CrashLogParseException(Exception):
414    pass
415
416class InteractiveCrashLogException(Exception):
417    pass
418
419class CrashLogParser:
420    "CrashLog parser base class and factory."
421    def __new__(cls, debugger, path, verbose):
422        data = JSONCrashLogParser.is_valid_json(path)
423        if data:
424            self = object.__new__(JSONCrashLogParser)
425            self.data = data
426            return self
427        else:
428            return object.__new__(TextCrashLogParser)
429
430    def __init__(self, debugger, path, verbose):
431        self.path = os.path.expanduser(path)
432        self.verbose = verbose
433        self.crashlog = CrashLog(debugger, self.path, self.verbose)
434
435    @abc.abstractmethod
436    def parse(self):
437        pass
438
439
440class JSONCrashLogParser(CrashLogParser):
441    @staticmethod
442    def is_valid_json(path):
443        def parse_json(buffer):
444            try:
445                return json.loads(buffer)
446            except:
447                # The first line can contain meta data. Try stripping it and
448                # try again.
449                head, _, tail = buffer.partition('\n')
450                return json.loads(tail)
451
452        with open(path, 'r', encoding='utf-8') as f:
453            buffer = f.read()
454        try:
455            return parse_json(buffer)
456        except:
457            return None
458
459    def parse(self):
460        try:
461            self.parse_process_info(self.data)
462            self.parse_images(self.data['usedImages'])
463            self.parse_main_image(self.data)
464            self.parse_threads(self.data['threads'])
465            self.parse_errors(self.data)
466            thread = self.crashlog.threads[self.crashlog.crashed_thread_idx]
467            reason = self.parse_crash_reason(self.data['exception'])
468            if thread.reason:
469                thread.reason = '{} {}'.format(thread.reason, reason)
470            else:
471                thread.reason = reason
472        except (KeyError, ValueError, TypeError) as e:
473            raise CrashLogParseException(
474                'Failed to parse JSON crashlog: {}: {}'.format(
475                    type(e).__name__, e))
476
477        return self.crashlog
478
479    def get_used_image(self, idx):
480        return self.data['usedImages'][idx]
481
482    def parse_process_info(self, json_data):
483        self.crashlog.process_id = json_data['pid']
484        self.crashlog.process_identifier = json_data['procName']
485
486    def parse_crash_reason(self, json_exception):
487        self.crashlog.exception = json_exception
488        exception_type = json_exception['type']
489        exception_signal = " "
490        if 'signal' in json_exception:
491            exception_signal += "({})".format(json_exception['signal'])
492
493        if 'codes' in json_exception:
494            exception_extra = " ({})".format(json_exception['codes'])
495        elif 'subtype' in json_exception:
496            exception_extra = " ({})".format(json_exception['subtype'])
497        else:
498            exception_extra = ""
499        return "{}{}{}".format(exception_type, exception_signal,
500                                  exception_extra)
501
502    def parse_images(self, json_images):
503        idx = 0
504        for json_image in json_images:
505            img_uuid = uuid.UUID(json_image['uuid'])
506            low = int(json_image['base'])
507            high = int(0)
508            name = json_image['name'] if 'name' in json_image else ''
509            path = json_image['path'] if 'path' in json_image else ''
510            version = ''
511            darwin_image = self.crashlog.DarwinImage(low, high, name, version,
512                                                     img_uuid, path,
513                                                     self.verbose)
514            self.crashlog.images.append(darwin_image)
515            idx += 1
516
517    def parse_main_image(self, json_data):
518        if 'procName' in json_data:
519            proc_name = json_data['procName']
520            self.crashlog.set_main_image(proc_name)
521
522    def parse_frames(self, thread, json_frames):
523        idx = 0
524        for json_frame in json_frames:
525            image_id = int(json_frame['imageIndex'])
526            json_image = self.get_used_image(image_id)
527            ident = json_image['name'] if 'name' in json_image else ''
528            thread.add_ident(ident)
529            if ident not in self.crashlog.idents:
530                self.crashlog.idents.append(ident)
531
532            frame_offset = int(json_frame['imageOffset'])
533            image_addr = self.get_used_image(image_id)['base']
534            pc = image_addr + frame_offset
535            thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset))
536
537            # on arm64 systems, if it jump through a null function pointer,
538            # we end up at address 0 and the crash reporter unwinder
539            # misses the frame that actually faulted.
540            # But $lr can tell us where the last BL/BLR instruction used
541            # was at, so insert that address as the caller stack frame.
542            if idx == 0 and pc == 0 and "lr" in thread.registers:
543                pc = thread.registers["lr"]
544                for image in self.data['usedImages']:
545                    text_lo = image['base']
546                    text_hi = text_lo + image['size']
547                    if text_lo <= pc < text_hi:
548                      idx += 1
549                      frame_offset = pc - text_lo
550                      thread.frames.append(self.crashlog.Frame(idx, pc, frame_offset))
551                      break
552
553            idx += 1
554
555    def parse_threads(self, json_threads):
556        idx = 0
557        for json_thread in json_threads:
558            thread = self.crashlog.Thread(idx, False)
559            if 'name' in json_thread:
560                thread.name = json_thread['name']
561                thread.reason = json_thread['name']
562            if 'id' in json_thread:
563                thread.id = int(json_thread['id'])
564            if json_thread.get('triggered', False):
565                self.crashlog.crashed_thread_idx = idx
566                thread.crashed = True
567                if 'threadState' in json_thread:
568                    thread.registers = self.parse_thread_registers(
569                        json_thread['threadState'])
570            if 'queue' in json_thread:
571                thread.queue = json_thread.get('queue')
572            self.parse_frames(thread, json_thread.get('frames', []))
573            self.crashlog.threads.append(thread)
574            idx += 1
575
576    def parse_thread_registers(self, json_thread_state, prefix=None):
577        registers = dict()
578        for key, state in json_thread_state.items():
579            if key == "rosetta":
580                registers.update(self.parse_thread_registers(state))
581                continue
582            if key == "x":
583                gpr_dict = { str(idx) : reg for idx,reg in enumerate(state) }
584                registers.update(self.parse_thread_registers(gpr_dict, key))
585                continue
586            try:
587                value = int(state['value'])
588                registers["{}{}".format(prefix or '',key)] = value
589            except (KeyError, ValueError, TypeError):
590                pass
591        return registers
592
593    def parse_errors(self, json_data):
594       if 'reportNotes' in json_data:
595          self.crashlog.errors = json_data['reportNotes']
596
597
598class CrashLogParseMode:
599    NORMAL = 0
600    THREAD = 1
601    IMAGES = 2
602    THREGS = 3
603    SYSTEM = 4
604    INSTRS = 5
605
606class TextCrashLogParser(CrashLogParser):
607    parent_process_regex = re.compile(r'^Parent Process:\s*(.*)\[(\d+)\]')
608    thread_state_regex = re.compile(r'^Thread \d+ crashed with')
609    thread_instrs_regex = re.compile(r'^Thread \d+ instruction stream')
610    thread_regex = re.compile(r'^Thread (\d+).*:')
611    app_backtrace_regex = re.compile(r'^Application Specific Backtrace (\d+).*:')
612    version = r'\(.+\)|(?:arm|x86_)[0-9a-z]+'
613    frame_regex = re.compile(r'^(\d+)\s+'              # id
614                             r'(.+?)\s+'               # img_name
615                             r'(?:' +version+ r'\s+)?' # img_version
616                             r'(0x[0-9a-fA-F]{4,})'    # addr (4 chars or more)
617                             r' +(.*)'                 # offs
618                            )
619    null_frame_regex = re.compile(r'^\d+\s+\?\?\?\s+0{4,} +')
620    image_regex_uuid = re.compile(r'(0x[0-9a-fA-F]+)'          # img_lo
621                                  r'\s+-\s+'                   #   -
622                                  r'(0x[0-9a-fA-F]+)\s+'       # img_hi
623                                  r'[+]?(.+?)\s+'              # img_name
624                                  r'(?:(' +version+ r')\s+)?'  # img_version
625                                  r'(?:<([-0-9a-fA-F]+)>\s+)?' # img_uuid
626                                  r'(\?+|/.*)'                 # img_path
627                                 )
628    exception_type_regex = re.compile(r'^Exception Type:\s+(EXC_[A-Z_]+)(?:\s+\((.*)\))?')
629    exception_codes_regex = re.compile(r'^Exception Codes:\s+(0x[0-9a-fA-F]+),\s*(0x[0-9a-fA-F]+)')
630    exception_extra_regex = re.compile(r'^Exception\s+.*:\s+(.*)')
631
632    def __init__(self, debugger, path, verbose):
633        super().__init__(debugger, path, verbose)
634        self.thread = None
635        self.app_specific_backtrace = False
636        self.parse_mode = CrashLogParseMode.NORMAL
637        self.parsers = {
638            CrashLogParseMode.NORMAL : self.parse_normal,
639            CrashLogParseMode.THREAD : self.parse_thread,
640            CrashLogParseMode.IMAGES : self.parse_images,
641            CrashLogParseMode.THREGS : self.parse_thread_registers,
642            CrashLogParseMode.SYSTEM : self.parse_system,
643            CrashLogParseMode.INSTRS : self.parse_instructions,
644        }
645
646    def parse(self):
647        with open(self.path,'r', encoding='utf-8') as f:
648            lines = f.read().splitlines()
649
650        for line in lines:
651            line_len = len(line)
652            if line_len == 0:
653                if self.thread:
654                    if self.parse_mode == CrashLogParseMode.THREAD:
655                        if self.thread.index == self.crashlog.crashed_thread_idx:
656                            self.thread.reason = ''
657                            if hasattr(self.crashlog, 'thread_exception'):
658                                self.thread.reason += self.crashlog.thread_exception
659                            if hasattr(self.crashlog, 'thread_exception_data'):
660                                self.thread.reason += " (%s)" % self.crashlog.thread_exception_data
661                        if self.app_specific_backtrace:
662                            self.crashlog.backtraces.append(self.thread)
663                        else:
664                            self.crashlog.threads.append(self.thread)
665                    self.thread = None
666                else:
667                    # only append an extra empty line if the previous line
668                    # in the info_lines wasn't empty
669                    if len(self.crashlog.info_lines) > 0 and len(self.crashlog.info_lines[-1]):
670                        self.crashlog.info_lines.append(line)
671                self.parse_mode = CrashLogParseMode.NORMAL
672            else:
673                self.parsers[self.parse_mode](line)
674
675        return self.crashlog
676
677    def parse_exception(self, line):
678        if not line.startswith('Exception'):
679            return
680        if line.startswith('Exception Type:'):
681            self.crashlog.thread_exception = line[15:].strip()
682            exception_type_match = self.exception_type_regex.search(line)
683            if exception_type_match:
684                exc_type, exc_signal = exception_type_match.groups()
685                self.crashlog.exception['type'] = exc_type
686                if exc_signal:
687                    self.crashlog.exception['signal'] = exc_signal
688        elif line.startswith('Exception Subtype:'):
689            self.crashlog.thread_exception_subtype = line[18:].strip()
690            if 'type' in self.crashlog.exception:
691                self.crashlog.exception['subtype'] = self.crashlog.thread_exception_subtype
692        elif line.startswith('Exception Codes:'):
693            self.crashlog.thread_exception_data = line[16:].strip()
694            if 'type' not in self.crashlog.exception:
695                return
696            exception_codes_match = self.exception_codes_regex.search(line)
697            if exception_codes_match:
698                self.crashlog.exception['codes'] = self.crashlog.thread_exception_data
699                code, subcode = exception_codes_match.groups()
700                self.crashlog.exception['rawCodes'] = [int(code, base=16),
701                                                       int(subcode, base=16)]
702        else:
703            if 'type' not in self.crashlog.exception:
704                return
705            exception_extra_match = self.exception_extra_regex.search(line)
706            if exception_extra_match:
707                self.crashlog.exception['message'] = exception_extra_match.group(1)
708
709    def parse_normal(self, line):
710        if line.startswith('Process:'):
711            (self.crashlog.process_name, pid_with_brackets) = line[
712                8:].strip().split(' [')
713            self.crashlog.process_id = pid_with_brackets.strip('[]')
714        elif line.startswith('Identifier:'):
715            self.crashlog.process_identifier = line[11:].strip()
716        elif line.startswith('Version:'):
717            version_string = line[8:].strip()
718            matched_pair = re.search("(.+)\((.+)\)", version_string)
719            if matched_pair:
720                self.crashlog.process_version = matched_pair.group(1)
721                self.crashlog.process_compatability_version = matched_pair.group(
722                    2)
723            else:
724                self.crashlog.process = version_string
725                self.crashlog.process_compatability_version = version_string
726        elif self.parent_process_regex.search(line):
727            parent_process_match = self.parent_process_regex.search(
728                line)
729            self.crashlog.parent_process_name = parent_process_match.group(1)
730            self.crashlog.parent_process_id = parent_process_match.group(2)
731        elif line.startswith('Exception'):
732            self.parse_exception(line)
733            return
734        elif line.startswith('Crashed Thread:'):
735            self.crashlog.crashed_thread_idx = int(line[15:].strip().split()[0])
736            return
737        elif line.startswith('Triggered by Thread:'): # iOS
738            self.crashlog.crashed_thread_idx = int(line[20:].strip().split()[0])
739            return
740        elif line.startswith('Report Version:'):
741            self.crashlog.version = int(line[15:].strip())
742            return
743        elif line.startswith('System Profile:'):
744            self.parse_mode = CrashLogParseMode.SYSTEM
745            return
746        elif (line.startswith('Interval Since Last Report:') or
747                line.startswith('Crashes Since Last Report:') or
748                line.startswith('Per-App Interval Since Last Report:') or
749                line.startswith('Per-App Crashes Since Last Report:') or
750                line.startswith('Sleep/Wake UUID:') or
751                line.startswith('Anonymous UUID:')):
752            # ignore these
753            return
754        elif line.startswith('Thread'):
755            thread_state_match = self.thread_state_regex.search(line)
756            if thread_state_match:
757                self.app_specific_backtrace = False
758                thread_state_match = self.thread_regex.search(line)
759                thread_idx = int(thread_state_match.group(1))
760                self.parse_mode = CrashLogParseMode.THREGS
761                self.thread = self.crashlog.threads[thread_idx]
762                return
763            thread_insts_match  = self.thread_instrs_regex.search(line)
764            if thread_insts_match:
765                self.parse_mode = CrashLogParseMode.INSTRS
766                return
767            thread_match = self.thread_regex.search(line)
768            if thread_match:
769                self.app_specific_backtrace = False
770                self.parse_mode = CrashLogParseMode.THREAD
771                thread_idx = int(thread_match.group(1))
772                self.thread = self.crashlog.Thread(thread_idx, False)
773                return
774            return
775        elif line.startswith('Binary Images:'):
776            self.parse_mode = CrashLogParseMode.IMAGES
777            return
778        elif line.startswith('Application Specific Backtrace'):
779            app_backtrace_match = self.app_backtrace_regex.search(line)
780            if app_backtrace_match:
781                self.parse_mode = CrashLogParseMode.THREAD
782                self.app_specific_backtrace = True
783                idx = int(app_backtrace_match.group(1))
784                self.thread = self.crashlog.Thread(idx, True)
785        elif line.startswith('Last Exception Backtrace:'): # iOS
786            self.parse_mode = CrashLogParseMode.THREAD
787            self.app_specific_backtrace = True
788            idx = 1
789            self.thread = self.crashlog.Thread(idx, True)
790        self.crashlog.info_lines.append(line.strip())
791
792    def parse_thread(self, line):
793        if line.startswith('Thread'):
794            return
795        if self.null_frame_regex.search(line):
796            print('warning: thread parser ignored null-frame: "%s"' % line)
797            return
798        frame_match = self.frame_regex.search(line)
799        if frame_match:
800            (frame_id, frame_img_name, frame_addr,
801                frame_ofs) = frame_match.groups()
802            ident = frame_img_name
803            self.thread.add_ident(ident)
804            if ident not in self.crashlog.idents:
805                self.crashlog.idents.append(ident)
806            self.thread.frames.append(self.crashlog.Frame(int(frame_id), int(
807                frame_addr, 0), frame_ofs))
808        else:
809            print('error: frame regex failed for line: "%s"' % line)
810
811    def parse_images(self, line):
812        image_match = self.image_regex_uuid.search(line)
813        if image_match:
814            (img_lo, img_hi, img_name, img_version,
815                img_uuid, img_path) = image_match.groups()
816            image = self.crashlog.DarwinImage(int(img_lo, 0), int(img_hi, 0),
817                                            img_name.strip(),
818                                            img_version.strip()
819                                            if img_version else "",
820                                            uuid.UUID(img_uuid), img_path,
821                                            self.verbose)
822            self.crashlog.images.append(image)
823        else:
824            print("error: image regex failed for: %s" % line)
825
826
827    def parse_thread_registers(self, line):
828        # "r12: 0x00007fff6b5939c8  r13: 0x0000000007000006  r14: 0x0000000000002a03  r15: 0x0000000000000c00"
829        reg_values = re.findall('([a-z0-9]+): (0x[0-9a-f]+)', line, re.I)
830        for reg, value in reg_values:
831            self.thread.registers[reg] = int(value, 16)
832
833    def parse_system(self, line):
834        self.crashlog.system_profile.append(line)
835
836    def parse_instructions(self, line):
837        pass
838
839
840def usage():
841    print("Usage: lldb-symbolicate.py [-n name] executable-image")
842    sys.exit(0)
843
844
845def save_crashlog(debugger, command, exe_ctx, result, dict):
846    usage = "usage: %prog [options] <output-path>"
847    description = '''Export the state of current target into a crashlog file'''
848    parser = optparse.OptionParser(
849        description=description,
850        prog='save_crashlog',
851        usage=usage)
852    parser.add_option(
853        '-v',
854        '--verbose',
855        action='store_true',
856        dest='verbose',
857        help='display verbose debug info',
858        default=False)
859    try:
860        (options, args) = parser.parse_args(shlex.split(command))
861    except:
862        result.PutCString("error: invalid options")
863        return
864    if len(args) != 1:
865        result.PutCString(
866            "error: invalid arguments, a single output file is the only valid argument")
867        return
868    out_file = open(args[0], 'w', encoding='utf-8')
869    if not out_file:
870        result.PutCString(
871            "error: failed to open file '%s' for writing...",
872            args[0])
873        return
874    target = exe_ctx.target
875    if target:
876        identifier = target.executable.basename
877        process = exe_ctx.process
878        if process:
879            pid = process.id
880            if pid != lldb.LLDB_INVALID_PROCESS_ID:
881                out_file.write(
882                    'Process:         %s [%u]\n' %
883                    (identifier, pid))
884        out_file.write('Path:            %s\n' % (target.executable.fullpath))
885        out_file.write('Identifier:      %s\n' % (identifier))
886        out_file.write('\nDate/Time:       %s\n' %
887                       (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
888        out_file.write(
889            'OS Version:      Mac OS X %s (%s)\n' %
890            (platform.mac_ver()[0], subprocess.check_output('sysctl -n kern.osversion', shell=True).decode("utf-8")))
891        out_file.write('Report Version:  9\n')
892        for thread_idx in range(process.num_threads):
893            thread = process.thread[thread_idx]
894            out_file.write('\nThread %u:\n' % (thread_idx))
895            for (frame_idx, frame) in enumerate(thread.frames):
896                frame_pc = frame.pc
897                frame_offset = 0
898                if frame.function:
899                    block = frame.GetFrameBlock()
900                    block_range = block.range[frame.addr]
901                    if block_range:
902                        block_start_addr = block_range[0]
903                        frame_offset = frame_pc - block_start_addr.GetLoadAddress(target)
904                    else:
905                        frame_offset = frame_pc - frame.function.addr.GetLoadAddress(target)
906                elif frame.symbol:
907                    frame_offset = frame_pc - frame.symbol.addr.GetLoadAddress(target)
908                out_file.write(
909                    '%-3u %-32s 0x%16.16x %s' %
910                    (frame_idx, frame.module.file.basename, frame_pc, frame.name))
911                if frame_offset > 0:
912                    out_file.write(' + %u' % (frame_offset))
913                line_entry = frame.line_entry
914                if line_entry:
915                    if options.verbose:
916                        # This will output the fullpath + line + column
917                        out_file.write(' %s' % (line_entry))
918                    else:
919                        out_file.write(
920                            ' %s:%u' %
921                            (line_entry.file.basename, line_entry.line))
922                        column = line_entry.column
923                        if column:
924                            out_file.write(':%u' % (column))
925                out_file.write('\n')
926
927        out_file.write('\nBinary Images:\n')
928        for module in target.modules:
929            text_segment = module.section['__TEXT']
930            if text_segment:
931                text_segment_load_addr = text_segment.GetLoadAddress(target)
932                if text_segment_load_addr != lldb.LLDB_INVALID_ADDRESS:
933                    text_segment_end_load_addr = text_segment_load_addr + text_segment.size
934                    identifier = module.file.basename
935                    module_version = '???'
936                    module_version_array = module.GetVersion()
937                    if module_version_array:
938                        module_version = '.'.join(
939                            map(str, module_version_array))
940                    out_file.write(
941                        '    0x%16.16x - 0x%16.16x  %s (%s - ???) <%s> %s\n' %
942                        (text_segment_load_addr,
943                         text_segment_end_load_addr,
944                         identifier,
945                         module_version,
946                         module.GetUUIDString(),
947                         module.file.fullpath))
948        out_file.close()
949    else:
950        result.PutCString("error: invalid target")
951
952
953class Symbolicate:
954    def __init__(self, debugger, internal_dict):
955        pass
956
957    def __call__(self, debugger, command, exe_ctx, result):
958        SymbolicateCrashLogs(debugger, shlex.split(command), result)
959
960    def get_short_help(self):
961        return "Symbolicate one or more darwin crash log files."
962
963    def get_long_help(self):
964        option_parser = CrashLogOptionParser()
965        return option_parser.format_help()
966
967
968def SymbolicateCrashLog(crash_log, options):
969    if options.debug:
970        crash_log.dump()
971    if not crash_log.images:
972        print('error: no images in crash log')
973        return
974
975    if options.dump_image_list:
976        print("Binary Images:")
977        for image in crash_log.images:
978            if options.verbose:
979                print(image.debug_dump())
980            else:
981                print(image)
982
983    target = crash_log.create_target()
984    if not target:
985        return
986    exe_module = target.GetModuleAtIndex(0)
987    images_to_load = list()
988    loaded_images = list()
989    if options.load_all_images:
990        # --load-all option was specified, load everything up
991        for image in crash_log.images:
992            images_to_load.append(image)
993    else:
994        # Only load the images found in stack frames for the crashed threads
995        if options.crashed_only:
996            for thread in crash_log.threads:
997                if thread.did_crash():
998                    for ident in thread.idents:
999                        images = crash_log.find_images_with_identifier(ident)
1000                        if images:
1001                            for image in images:
1002                                images_to_load.append(image)
1003                        else:
1004                            print('error: can\'t find image for identifier "%s"' % ident)
1005        else:
1006            for ident in crash_log.idents:
1007                images = crash_log.find_images_with_identifier(ident)
1008                if images:
1009                    for image in images:
1010                        images_to_load.append(image)
1011                else:
1012                    print('error: can\'t find image for identifier "%s"' % ident)
1013
1014    futures = []
1015    with concurrent.futures.ThreadPoolExecutor() as executor:
1016        def add_module(image, target):
1017            return image, image.add_module(target)
1018
1019        for image in images_to_load:
1020            futures.append(executor.submit(add_module, image=image, target=target))
1021
1022        for future in concurrent.futures.as_completed(futures):
1023            image, err = future.result()
1024            if err:
1025                print(err)
1026            else:
1027                loaded_images.append(image)
1028
1029    if crash_log.backtraces:
1030        for thread in crash_log.backtraces:
1031            thread.dump_symbolicated(crash_log, options)
1032            print()
1033
1034    for thread in crash_log.threads:
1035        thread.dump_symbolicated(crash_log, options)
1036        print()
1037
1038    if crash_log.errors:
1039        print("Errors:")
1040        for error in crash_log.errors:
1041            print(error)
1042
1043def load_crashlog_in_scripted_process(debugger, crash_log_file, options, result):
1044    crashlog_path = os.path.expanduser(crash_log_file)
1045    if not os.path.exists(crashlog_path):
1046        raise InteractiveCrashLogException("crashlog file %s does not exist" % crashlog_path)
1047
1048    crashlog = CrashLogParser(debugger, crashlog_path, False).parse()
1049
1050    target = lldb.SBTarget()
1051    # 1. Try to use the user-provided target
1052    if options.target_path:
1053        target = debugger.CreateTarget(options.target_path)
1054        if not target:
1055            raise InteractiveCrashLogException("couldn't create target provided by the user (%s)" % options.target_path)
1056
1057    # 2. If the user didn't provide a target, try to create a target using the symbolicator
1058    if not target or not target.IsValid():
1059        target = crashlog.create_target()
1060    # 3. If that didn't work, and a target is already loaded, use it
1061    if (target is None  or not target.IsValid()) and debugger.GetNumTargets() > 0:
1062        target = debugger.GetTargetAtIndex(0)
1063    # 4. Fail
1064    if target is None or not target.IsValid():
1065        raise InteractiveCrashLogException("couldn't create target")
1066
1067    ci = debugger.GetCommandInterpreter()
1068    if not ci:
1069        raise InteractiveCrashLogException("couldn't get command interpreter")
1070
1071    ci.HandleCommand('script from lldb.macosx import crashlog_scripted_process', result)
1072    if not result.Succeeded():
1073        raise InteractiveCrashLogException("couldn't import crashlog scripted process module")
1074
1075    structured_data = lldb.SBStructuredData()
1076    structured_data.SetFromJSON(json.dumps({ "crashlog_path" : crashlog_path,
1077                                             "load_all_images": options.load_all_images }))
1078    launch_info = lldb.SBLaunchInfo(None)
1079    launch_info.SetProcessPluginName("ScriptedProcess")
1080    launch_info.SetScriptedProcessClassName("crashlog_scripted_process.CrashLogScriptedProcess")
1081    launch_info.SetScriptedProcessDictionary(structured_data)
1082    error = lldb.SBError()
1083    process = target.Launch(launch_info, error)
1084
1085    if not process or error.Fail():
1086        raise InteractiveCrashLogException("couldn't launch Scripted Process", error)
1087
1088    if not options.skip_status:
1089        @contextlib.contextmanager
1090        def synchronous(debugger):
1091            async_state = debugger.GetAsync()
1092            debugger.SetAsync(False)
1093            try:
1094                yield
1095            finally:
1096                debugger.SetAsync(async_state)
1097
1098        with synchronous(debugger):
1099            run_options = lldb.SBCommandInterpreterRunOptions()
1100            run_options.SetStopOnError(True)
1101            run_options.SetStopOnCrash(True)
1102            run_options.SetEchoCommands(True)
1103
1104            commands_stream = lldb.SBStream()
1105            commands_stream.Print("process status\n")
1106            commands_stream.Print("thread backtrace\n")
1107            error = debugger.SetInputString(commands_stream.GetData())
1108            if error.Success():
1109                debugger.RunCommandInterpreter(True, False, run_options, 0, False, True)
1110
1111def CreateSymbolicateCrashLogOptions(
1112        command_name,
1113        description,
1114        add_interactive_options):
1115    usage = "usage: %prog [options] <FILE> [FILE ...]"
1116    option_parser = optparse.OptionParser(
1117        description=description, prog='crashlog', usage=usage)
1118    option_parser.add_option(
1119        '--version',
1120        '-V',
1121        dest='version',
1122        action='store_true',
1123        help='Show crashlog version',
1124        default=False)
1125    option_parser.add_option(
1126        '--verbose',
1127        '-v',
1128        action='store_true',
1129        dest='verbose',
1130        help='display verbose debug info',
1131        default=False)
1132    option_parser.add_option(
1133        '--debug',
1134        '-g',
1135        action='store_true',
1136        dest='debug',
1137        help='display verbose debug logging',
1138        default=False)
1139    option_parser.add_option(
1140        '--load-all',
1141        '-a',
1142        action='store_true',
1143        dest='load_all_images',
1144        help='load all executable images, not just the images found in the '
1145        'crashed stack frames, loads stackframes for all the threads in '
1146        'interactive mode.',
1147        default=False)
1148    option_parser.add_option(
1149        '--images',
1150        action='store_true',
1151        dest='dump_image_list',
1152        help='show image list',
1153        default=False)
1154    option_parser.add_option(
1155        '--debug-delay',
1156        type='int',
1157        dest='debug_delay',
1158        metavar='NSEC',
1159        help='pause for NSEC seconds for debugger',
1160        default=0)
1161    option_parser.add_option(
1162        '--crashed-only',
1163        '-c',
1164        action='store_true',
1165        dest='crashed_only',
1166        help='only symbolicate the crashed thread',
1167        default=False)
1168    option_parser.add_option(
1169        '--disasm-depth',
1170        '-d',
1171        type='int',
1172        dest='disassemble_depth',
1173        help='set the depth in stack frames that should be disassembled (default is 1)',
1174        default=1)
1175    option_parser.add_option(
1176        '--disasm-all',
1177        '-D',
1178        action='store_true',
1179        dest='disassemble_all_threads',
1180        help='enabled disassembly of frames on all threads (not just the crashed thread)',
1181        default=False)
1182    option_parser.add_option(
1183        '--disasm-before',
1184        '-B',
1185        type='int',
1186        dest='disassemble_before',
1187        help='the number of instructions to disassemble before the frame PC',
1188        default=4)
1189    option_parser.add_option(
1190        '--disasm-after',
1191        '-A',
1192        type='int',
1193        dest='disassemble_after',
1194        help='the number of instructions to disassemble after the frame PC',
1195        default=4)
1196    option_parser.add_option(
1197        '--source-context',
1198        '-C',
1199        type='int',
1200        metavar='NLINES',
1201        dest='source_context',
1202        help='show NLINES source lines of source context (default = 4)',
1203        default=4)
1204    option_parser.add_option(
1205        '--source-frames',
1206        type='int',
1207        metavar='NFRAMES',
1208        dest='source_frames',
1209        help='show source for NFRAMES (default = 4)',
1210        default=4)
1211    option_parser.add_option(
1212        '--source-all',
1213        action='store_true',
1214        dest='source_all',
1215        help='show source for all threads, not just the crashed thread',
1216        default=False)
1217    if add_interactive_options:
1218        option_parser.add_option(
1219            '-i',
1220            '--interactive',
1221            action='store_true',
1222            help='parse a crash log and load it in a ScriptedProcess',
1223            default=False)
1224        option_parser.add_option(
1225            '-b',
1226            '--batch',
1227            action='store_true',
1228            help='dump symbolicated stackframes without creating a debug session',
1229            default=True)
1230        option_parser.add_option(
1231            '--target',
1232            '-t',
1233            dest='target_path',
1234            help='the target binary path that should be used for interactive crashlog (optional)',
1235            default=None)
1236        option_parser.add_option(
1237            '--skip-status',
1238            '-s',
1239            dest='skip_status',
1240            action='store_true',
1241            help='prevent the interactive crashlog to dump the process status and thread backtrace at launch',
1242            default=False)
1243    return option_parser
1244
1245
1246def CrashLogOptionParser():
1247    description = '''Symbolicate one or more darwin crash log files to provide source file and line information,
1248inlined stack frames back to the concrete functions, and disassemble the location of the crash
1249for the first frame of the crashed thread.
1250If this script is imported into the LLDB command interpreter, a "crashlog" command will be added to the interpreter
1251for use at the LLDB command line. After a crash log has been parsed and symbolicated, a target will have been
1252created that has all of the shared libraries loaded at the load addresses found in the crash log file. This allows
1253you to explore the program as if it were stopped at the locations described in the crash log and functions can
1254be disassembled and lookups can be performed using the addresses found in the crash log.'''
1255    return CreateSymbolicateCrashLogOptions('crashlog', description, True)
1256
1257def SymbolicateCrashLogs(debugger, command_args, result):
1258    option_parser = CrashLogOptionParser()
1259
1260    if not len(command_args):
1261        option_parser.print_help()
1262        return
1263
1264    try:
1265        (options, args) = option_parser.parse_args(command_args)
1266    except:
1267        return
1268
1269    if options.version:
1270        print(debugger.GetVersionString())
1271        return
1272
1273    if options.debug:
1274        print('command_args = %s' % command_args)
1275        print('options', options)
1276        print('args', args)
1277
1278    if options.debug_delay > 0:
1279        print("Waiting %u seconds for debugger to attach..." % options.debug_delay)
1280        time.sleep(options.debug_delay)
1281    error = lldb.SBError()
1282
1283    def should_run_in_interactive_mode(options, ci):
1284        if options.interactive:
1285            return True
1286        elif options.batch:
1287            return False
1288        # elif ci and ci.IsInteractive():
1289        #     return True
1290        else:
1291            return False
1292
1293    ci = debugger.GetCommandInterpreter()
1294
1295    if args:
1296        for crash_log_file in args:
1297            if should_run_in_interactive_mode(options, ci):
1298                try:
1299                    load_crashlog_in_scripted_process(debugger, crash_log_file,
1300                                                      options, result)
1301                except InteractiveCrashLogException as e:
1302                    result.SetError(str(e))
1303            else:
1304                crash_log = CrashLogParser(debugger, crash_log_file, options.verbose).parse()
1305                SymbolicateCrashLog(crash_log, options)
1306
1307if __name__ == '__main__':
1308    # Create a new debugger instance
1309    debugger = lldb.SBDebugger.Create()
1310    result = lldb.SBCommandReturnObject()
1311    SymbolicateCrashLogs(debugger, sys.argv[1:], result)
1312    lldb.SBDebugger.Destroy(debugger)
1313
1314def __lldb_init_module(debugger, internal_dict):
1315    debugger.HandleCommand(
1316        'command script add -c lldb.macosx.crashlog.Symbolicate crashlog')
1317    debugger.HandleCommand(
1318        'command script add -f lldb.macosx.crashlog.save_crashlog save_crashlog')
1319    print('"crashlog" and "save_crashlog" commands have been installed, use '
1320          'the "--help" options on these commands for detailed help.')
1321