1#!/usr/bin/env python 2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https://llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7# 8#===------------------------------------------------------------------------===# 9""" 10Example of use: 11 asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log 12 13PLUGINS 14 15This script provides a way for external plug-ins to hook into the behaviour of 16various parts of this script (see `--plugins`). This is useful for situations 17where it is necessary to handle site-specific quirks (e.g. binaries with debug 18symbols only accessible via a remote service) without having to modify the 19script itself. 20 21""" 22import argparse 23import bisect 24import errno 25import getopt 26import logging 27import os 28import re 29import subprocess 30import sys 31from distutils.spawn import find_executable 32 33symbolizers = {} 34demangle = False 35binutils_prefix = None 36fix_filename_patterns = None 37logfile = sys.stdin 38allow_system_symbolizer = True 39force_system_symbolizer = False 40 41# FIXME: merge the code that calls fix_filename(). 42def fix_filename(file_name): 43 if fix_filename_patterns: 44 for path_to_cut in fix_filename_patterns: 45 file_name = re.sub('.*' + path_to_cut, '', file_name) 46 file_name = re.sub('.*asan_[a-z_]*.(cc|cpp):[0-9]*', '_asan_rtl_', file_name) 47 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) 48 return file_name 49 50def is_valid_arch(s): 51 return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s", 52 "armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390"] 53 54def guess_arch(addr): 55 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 56 if len(addr) > 10: 57 return 'x86_64' 58 else: 59 return 'i386' 60 61class Symbolizer(object): 62 def __init__(self): 63 pass 64 65 def symbolize(self, addr, binary, offset): 66 """Symbolize the given address (pair of binary and offset). 67 68 Overriden in subclasses. 69 Args: 70 addr: virtual address of an instruction. 71 binary: path to executable/shared object containing this instruction. 72 offset: instruction offset in the @binary. 73 Returns: 74 list of strings (one string for each inlined frame) describing 75 the code locations for this instruction (that is, function name, file 76 name, line and column numbers). 77 """ 78 return None 79 80 81class LLVMSymbolizer(Symbolizer): 82 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): 83 super(LLVMSymbolizer, self).__init__() 84 self.symbolizer_path = symbolizer_path 85 self.default_arch = default_arch 86 self.system = system 87 self.dsym_hints = dsym_hints 88 self.pipe = self.open_llvm_symbolizer() 89 90 def open_llvm_symbolizer(self): 91 cmd = [self.symbolizer_path, 92 '--use-symbol-table=true', 93 '--demangle=%s' % demangle, 94 '--functions=linkage', 95 '--inlining=true', 96 '--default-arch=%s' % self.default_arch] 97 if self.system == 'Darwin': 98 for hint in self.dsym_hints: 99 cmd.append('--dsym-hint=%s' % hint) 100 logging.debug(' '.join(cmd)) 101 try: 102 result = subprocess.Popen(cmd, stdin=subprocess.PIPE, 103 stdout=subprocess.PIPE, 104 bufsize=0, 105 universal_newlines=True) 106 except OSError: 107 result = None 108 return result 109 110 def symbolize(self, addr, binary, offset): 111 """Overrides Symbolizer.symbolize.""" 112 if not self.pipe: 113 return None 114 result = [] 115 try: 116 symbolizer_input = '"%s" %s' % (binary, offset) 117 logging.debug(symbolizer_input) 118 self.pipe.stdin.write("%s\n" % symbolizer_input) 119 while True: 120 function_name = self.pipe.stdout.readline().rstrip() 121 if not function_name: 122 break 123 file_name = self.pipe.stdout.readline().rstrip() 124 file_name = fix_filename(file_name) 125 if (not function_name.startswith('??') or 126 not file_name.startswith('??')): 127 # Append only non-trivial frames. 128 result.append('%s in %s %s' % (addr, function_name, 129 file_name)) 130 except Exception: 131 result = [] 132 if not result: 133 result = None 134 return result 135 136 137def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]): 138 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') 139 if not symbolizer_path: 140 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH') 141 if not symbolizer_path: 142 # Assume llvm-symbolizer is in PATH. 143 symbolizer_path = 'llvm-symbolizer' 144 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints) 145 146 147class Addr2LineSymbolizer(Symbolizer): 148 def __init__(self, binary): 149 super(Addr2LineSymbolizer, self).__init__() 150 self.binary = binary 151 self.pipe = self.open_addr2line() 152 self.output_terminator = -1 153 154 def open_addr2line(self): 155 addr2line_tool = 'addr2line' 156 if binutils_prefix: 157 addr2line_tool = binutils_prefix + addr2line_tool 158 logging.debug('addr2line binary is %s' % find_executable(addr2line_tool)) 159 cmd = [addr2line_tool, '-fi'] 160 if demangle: 161 cmd += ['--demangle'] 162 cmd += ['-e', self.binary] 163 logging.debug(' '.join(cmd)) 164 return subprocess.Popen(cmd, 165 stdin=subprocess.PIPE, stdout=subprocess.PIPE, 166 bufsize=0, 167 universal_newlines=True) 168 169 def symbolize(self, addr, binary, offset): 170 """Overrides Symbolizer.symbolize.""" 171 if self.binary != binary: 172 return None 173 lines = [] 174 try: 175 self.pipe.stdin.write("%s\n" % offset) 176 self.pipe.stdin.write("%s\n" % self.output_terminator) 177 is_first_frame = True 178 while True: 179 function_name = self.pipe.stdout.readline().rstrip() 180 logging.debug("read function_name='%s' from addr2line" % function_name) 181 # If llvm-symbolizer is installed as addr2line, older versions of 182 # llvm-symbolizer will print -1 when presented with -1 and not print 183 # a second line. In that case we will block for ever trying to read the 184 # file name. This also happens for non-existent files, in which case GNU 185 # addr2line exits immediate, but llvm-symbolizer does not (see 186 # https://llvm.org/PR42754). 187 if function_name == '-1': 188 logging.debug("got function '-1' -> no more input") 189 break 190 file_name = self.pipe.stdout.readline().rstrip() 191 logging.debug("read file_name='%s' from addr2line" % file_name) 192 if is_first_frame: 193 is_first_frame = False 194 elif function_name == '??': 195 assert file_name == '??:0', file_name 196 logging.debug("got function '??' -> no more input") 197 break 198 elif not function_name: 199 assert not file_name, file_name 200 logging.debug("got empty function name -> no more input") 201 break 202 if not function_name and not file_name: 203 logging.debug("got empty function and file name -> unknown function") 204 function_name = '??' 205 file_name = '??:0' 206 lines.append((function_name, file_name)) 207 except IOError as e: 208 # EPIPE happens if addr2line exits early (which some implementations do 209 # if an invalid file is passed). 210 if e.errno == errno.EPIPE: 211 logging.debug("addr2line exited early (broken pipe), returncode=%d" % self.pipe.poll()) 212 else: 213 logging.debug("unexpected I/O exception communicating with addr2line", exc_info=e) 214 lines.append(('??', '??:0')) 215 except Exception as e: 216 logging.debug("got unknown exception communicating with addr2line", exc_info=e) 217 lines.append(('??', '??:0')) 218 return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines] 219 220class UnbufferedLineConverter(object): 221 """ 222 Wrap a child process that responds to each line of input with one line of 223 output. Uses pty to trick the child into providing unbuffered output. 224 """ 225 def __init__(self, args, close_stderr=False): 226 # Local imports so that the script can start on Windows. 227 import pty 228 import termios 229 pid, fd = pty.fork() 230 if pid == 0: 231 # We're the child. Transfer control to command. 232 if close_stderr: 233 dev_null = os.open('/dev/null', 0) 234 os.dup2(dev_null, 2) 235 os.execvp(args[0], args) 236 else: 237 # Disable echoing. 238 attr = termios.tcgetattr(fd) 239 attr[3] = attr[3] & ~termios.ECHO 240 termios.tcsetattr(fd, termios.TCSANOW, attr) 241 # Set up a file()-like interface to the child process 242 self.r = os.fdopen(fd, "r", 1) 243 self.w = os.fdopen(os.dup(fd), "w", 1) 244 245 def convert(self, line): 246 self.w.write(line + "\n") 247 return self.readline() 248 249 def readline(self): 250 return self.r.readline().rstrip() 251 252 253class DarwinSymbolizer(Symbolizer): 254 def __init__(self, addr, binary, arch): 255 super(DarwinSymbolizer, self).__init__() 256 self.binary = binary 257 self.arch = arch 258 self.open_atos() 259 260 def open_atos(self): 261 logging.debug('atos -o %s -arch %s', self.binary, self.arch) 262 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] 263 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) 264 265 def symbolize(self, addr, binary, offset): 266 """Overrides Symbolizer.symbolize.""" 267 if self.binary != binary: 268 return None 269 if not os.path.exists(binary): 270 # If the binary doesn't exist atos will exit which will lead to IOError 271 # exceptions being raised later on so just don't try to symbolize. 272 return ['{} ({}:{}+{})'.format(addr, binary, self.arch, offset)] 273 atos_line = self.atos.convert('0x%x' % int(offset, 16)) 274 while "got symbolicator for" in atos_line: 275 atos_line = self.atos.readline() 276 # A well-formed atos response looks like this: 277 # foo(type1, type2) (in object.name) (filename.cc:80) 278 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 279 logging.debug('atos_line: %s', atos_line) 280 if match: 281 function_name = match.group(1) 282 function_name = re.sub('\(.*?\)', '', function_name) 283 file_name = fix_filename(match.group(3)) 284 return ['%s in %s %s' % (addr, function_name, file_name)] 285 else: 286 return ['%s in %s' % (addr, atos_line)] 287 288 289# Chain several symbolizers so that if one symbolizer fails, we fall back 290# to the next symbolizer in chain. 291class ChainSymbolizer(Symbolizer): 292 def __init__(self, symbolizer_list): 293 super(ChainSymbolizer, self).__init__() 294 self.symbolizer_list = symbolizer_list 295 296 def symbolize(self, addr, binary, offset): 297 """Overrides Symbolizer.symbolize.""" 298 for symbolizer in self.symbolizer_list: 299 if symbolizer: 300 result = symbolizer.symbolize(addr, binary, offset) 301 if result: 302 return result 303 return None 304 305 def append_symbolizer(self, symbolizer): 306 self.symbolizer_list.append(symbolizer) 307 308 309def BreakpadSymbolizerFactory(binary): 310 suffix = os.getenv('BREAKPAD_SUFFIX') 311 if suffix: 312 filename = binary + suffix 313 if os.access(filename, os.F_OK): 314 return BreakpadSymbolizer(filename) 315 return None 316 317 318def SystemSymbolizerFactory(system, addr, binary, arch): 319 if system == 'Darwin': 320 return DarwinSymbolizer(addr, binary, arch) 321 elif system in ['Linux', 'FreeBSD', 'NetBSD', 'SunOS']: 322 return Addr2LineSymbolizer(binary) 323 324 325class BreakpadSymbolizer(Symbolizer): 326 def __init__(self, filename): 327 super(BreakpadSymbolizer, self).__init__() 328 self.filename = filename 329 lines = file(filename).readlines() 330 self.files = [] 331 self.symbols = {} 332 self.address_list = [] 333 self.addresses = {} 334 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 335 fragments = lines[0].rstrip().split() 336 self.arch = fragments[2] 337 self.debug_id = fragments[3] 338 self.binary = ' '.join(fragments[4:]) 339 self.parse_lines(lines[1:]) 340 341 def parse_lines(self, lines): 342 cur_function_addr = '' 343 for line in lines: 344 fragments = line.split() 345 if fragments[0] == 'FILE': 346 assert int(fragments[1]) == len(self.files) 347 self.files.append(' '.join(fragments[2:])) 348 elif fragments[0] == 'PUBLIC': 349 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 350 elif fragments[0] in ['CFI', 'STACK']: 351 pass 352 elif fragments[0] == 'FUNC': 353 cur_function_addr = int(fragments[1], 16) 354 if not cur_function_addr in self.symbols.keys(): 355 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 356 else: 357 # Line starting with an address. 358 addr = int(fragments[0], 16) 359 self.address_list.append(addr) 360 # Tuple of symbol address, size, line, file number. 361 self.addresses[addr] = (cur_function_addr, 362 int(fragments[1], 16), 363 int(fragments[2]), 364 int(fragments[3])) 365 self.address_list.sort() 366 367 def get_sym_file_line(self, addr): 368 key = None 369 if addr in self.addresses.keys(): 370 key = addr 371 else: 372 index = bisect.bisect_left(self.address_list, addr) 373 if index == 0: 374 return None 375 else: 376 key = self.address_list[index - 1] 377 sym_id, size, line_no, file_no = self.addresses[key] 378 symbol = self.symbols[sym_id] 379 filename = self.files[file_no] 380 if addr < key + size: 381 return symbol, filename, line_no 382 else: 383 return None 384 385 def symbolize(self, addr, binary, offset): 386 if self.binary != binary: 387 return None 388 res = self.get_sym_file_line(int(offset, 16)) 389 if res: 390 function_name, file_name, line_no = res 391 result = ['%s in %s %s:%d' % ( 392 addr, function_name, file_name, line_no)] 393 print(result) 394 return result 395 else: 396 return None 397 398 399class SymbolizationLoop(object): 400 def __init__(self, plugin_proxy=None, dsym_hint_producer=None): 401 self.plugin_proxy = plugin_proxy 402 if sys.platform == 'win32': 403 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works 404 # even in sandboxed processes. Nothing needs to be done here. 405 self.process_line = self.process_line_echo 406 else: 407 # Used by clients who may want to supply a different binary name. 408 # E.g. in Chrome several binaries may share a single .dSYM. 409 self.dsym_hint_producer = dsym_hint_producer 410 self.system = os.uname()[0] 411 if self.system not in ['Linux', 'Darwin', 'FreeBSD', 'NetBSD','SunOS']: 412 raise Exception('Unknown system') 413 self.llvm_symbolizers = {} 414 self.last_llvm_symbolizer = None 415 self.dsym_hints = set([]) 416 self.frame_no = 0 417 self.process_line = self.process_line_posix 418 self.using_module_map = plugin_proxy.has_plugin(ModuleMapPlugIn.get_name()) 419 420 def symbolize_address(self, addr, binary, offset, arch): 421 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use 422 # a single symbolizer binary. 423 # On Darwin, if the dsym hint producer is present: 424 # 1. check whether we've seen this binary already; if so, 425 # use |llvm_symbolizers[binary]|, which has already loaded the debug 426 # info for this binary (might not be the case for 427 # |last_llvm_symbolizer|); 428 # 2. otherwise check if we've seen all the hints for this binary already; 429 # if so, reuse |last_llvm_symbolizer| which has the full set of hints; 430 # 3. otherwise create a new symbolizer and pass all currently known 431 # .dSYM hints to it. 432 result = None 433 if not force_system_symbolizer: 434 if not binary in self.llvm_symbolizers: 435 use_new_symbolizer = True 436 if self.system == 'Darwin' and self.dsym_hint_producer: 437 dsym_hints_for_binary = set(self.dsym_hint_producer(binary)) 438 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints) 439 self.dsym_hints |= dsym_hints_for_binary 440 if self.last_llvm_symbolizer and not use_new_symbolizer: 441 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 442 else: 443 self.last_llvm_symbolizer = LLVMSymbolizerFactory( 444 self.system, arch, self.dsym_hints) 445 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 446 # Use the chain of symbolizers: 447 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 448 # (fall back to next symbolizer if the previous one fails). 449 if not binary in symbolizers: 450 symbolizers[binary] = ChainSymbolizer( 451 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]]) 452 result = symbolizers[binary].symbolize(addr, binary, offset) 453 else: 454 symbolizers[binary] = ChainSymbolizer([]) 455 if result is None: 456 if not allow_system_symbolizer: 457 raise Exception('Failed to launch or use llvm-symbolizer.') 458 # Initialize system symbolizer only if other symbolizers failed. 459 symbolizers[binary].append_symbolizer( 460 SystemSymbolizerFactory(self.system, addr, binary, arch)) 461 result = symbolizers[binary].symbolize(addr, binary, offset) 462 # The system symbolizer must produce some result. 463 assert result 464 return result 465 466 def get_symbolized_lines(self, symbolized_lines, inc_frame_counter=True): 467 if not symbolized_lines: 468 if inc_frame_counter: 469 self.frame_no += 1 470 return [self.current_line] 471 else: 472 assert inc_frame_counter 473 result = [] 474 for symbolized_frame in symbolized_lines: 475 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip())) 476 self.frame_no += 1 477 return result 478 479 def process_logfile(self): 480 self.frame_no = 0 481 for line in logfile: 482 processed = self.process_line(line) 483 print('\n'.join(processed)) 484 485 def process_line_echo(self, line): 486 return [line.rstrip()] 487 488 def process_line_posix(self, line): 489 self.current_line = line.rstrip() 490 # Unsymbolicated: 491 # #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 492 # Partially symbolicated: 493 # #0 0x7f6e35cf2e45 in foo (foo.so+0x11fe45) 494 # NOTE: We have to very liberal with symbol 495 # names in the regex because it could be an 496 # Objective-C or C++ demangled name. 497 stack_trace_line_format = ( 498 '^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)') 499 match = re.match(stack_trace_line_format, line) 500 if not match: 501 logging.debug('Line "{}" does not match regex'.format(line)) 502 # Not a frame line so don't increment the frame counter. 503 return self.get_symbolized_lines(None, inc_frame_counter=False) 504 logging.debug(line) 505 _, frameno_str, addr, binary, offset = match.groups() 506 507 if not self.using_module_map and not os.path.isabs(binary): 508 # Do not try to symbolicate if the binary is just the module file name 509 # and a module map is unavailable. 510 # FIXME(dliew): This is currently necessary for reports on Darwin that are 511 # partially symbolicated by `atos`. 512 return self.get_symbolized_lines(None) 513 arch = "" 514 # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h" 515 colon_pos = binary.rfind(":") 516 if colon_pos != -1: 517 maybe_arch = binary[colon_pos+1:] 518 if is_valid_arch(maybe_arch): 519 arch = maybe_arch 520 binary = binary[0:colon_pos] 521 if arch == "": 522 arch = guess_arch(addr) 523 if frameno_str == '0': 524 # Assume that frame #0 is the first frame of new stack trace. 525 self.frame_no = 0 526 original_binary = binary 527 binary = self.plugin_proxy.filter_binary_path(binary) 528 if binary is None: 529 # The binary filter has told us this binary can't be symbolized. 530 logging.debug('Skipping symbolication of binary "%s"', original_binary) 531 return self.get_symbolized_lines(None) 532 symbolized_line = self.symbolize_address(addr, binary, offset, arch) 533 if not symbolized_line: 534 if original_binary != binary: 535 symbolized_line = self.symbolize_address(addr, original_binary, offset, arch) 536 return self.get_symbolized_lines(symbolized_line) 537 538class AsanSymbolizerPlugInProxy(object): 539 """ 540 Serves several purposes: 541 - Manages the lifetime of plugins (must be used a `with` statement). 542 - Provides interface for calling into plugins from within this script. 543 """ 544 def __init__(self): 545 self._plugins = [ ] 546 self._plugin_names = set() 547 548 def _load_plugin_from_file_impl_py_gt_2(self, file_path, globals_space): 549 with open(file_path, 'r') as f: 550 exec(f.read(), globals_space, None) 551 552 def load_plugin_from_file(self, file_path): 553 logging.info('Loading plugins from "{}"'.format(file_path)) 554 globals_space = dict(globals()) 555 # Provide function to register plugins 556 def register_plugin(plugin): 557 logging.info('Registering plugin %s', plugin.get_name()) 558 self.add_plugin(plugin) 559 globals_space['register_plugin'] = register_plugin 560 if sys.version_info.major < 3: 561 execfile(file_path, globals_space, None) 562 else: 563 # Indirection here is to avoid a bug in older Python 2 versions: 564 # `SyntaxError: unqualified exec is not allowed in function ...` 565 self._load_plugin_from_file_impl_py_gt_2(file_path, globals_space) 566 567 def add_plugin(self, plugin): 568 assert isinstance(plugin, AsanSymbolizerPlugIn) 569 self._plugins.append(plugin) 570 self._plugin_names.add(plugin.get_name()) 571 plugin._receive_proxy(self) 572 573 def remove_plugin(self, plugin): 574 assert isinstance(plugin, AsanSymbolizerPlugIn) 575 self._plugins.remove(plugin) 576 self._plugin_names.remove(plugin.get_name()) 577 logging.debug('Removing plugin %s', plugin.get_name()) 578 plugin.destroy() 579 580 def has_plugin(self, name): 581 """ 582 Returns true iff the plugin name is currently 583 being managed by AsanSymbolizerPlugInProxy. 584 """ 585 return name in self._plugin_names 586 587 def register_cmdline_args(self, parser): 588 plugins = list(self._plugins) 589 for plugin in plugins: 590 plugin.register_cmdline_args(parser) 591 592 def process_cmdline_args(self, pargs): 593 # Use copy so we can remove items as we iterate. 594 plugins = list(self._plugins) 595 for plugin in plugins: 596 keep = plugin.process_cmdline_args(pargs) 597 assert isinstance(keep, bool) 598 if not keep: 599 self.remove_plugin(plugin) 600 601 def __enter__(self): 602 return self 603 604 def __exit__(self, exc_type, exc_val, exc_tb): 605 for plugin in self._plugins: 606 plugin.destroy() 607 # Don't suppress raised exceptions 608 return False 609 610 def _filter_single_value(self, function_name, input_value): 611 """ 612 Helper for filter style plugin functions. 613 """ 614 new_value = input_value 615 for plugin in self._plugins: 616 result = getattr(plugin, function_name)(new_value) 617 if result is None: 618 return None 619 new_value = result 620 return new_value 621 622 def filter_binary_path(self, binary_path): 623 """ 624 Consult available plugins to filter the path to a binary 625 to make it suitable for symbolication. 626 627 Returns `None` if symbolication should not be attempted for this 628 binary. 629 """ 630 return self._filter_single_value('filter_binary_path', binary_path) 631 632 def filter_module_desc(self, module_desc): 633 """ 634 Consult available plugins to determine the module 635 description suitable for symbolication. 636 637 Returns `None` if symbolication should not be attempted for this module. 638 """ 639 assert isinstance(module_desc, ModuleDesc) 640 return self._filter_single_value('filter_module_desc', module_desc) 641 642class AsanSymbolizerPlugIn(object): 643 """ 644 This is the interface the `asan_symbolize.py` code uses to talk 645 to plugins. 646 """ 647 @classmethod 648 def get_name(cls): 649 """ 650 Returns the name of the plugin. 651 """ 652 return cls.__name__ 653 654 def _receive_proxy(self, proxy): 655 assert isinstance(proxy, AsanSymbolizerPlugInProxy) 656 self.proxy = proxy 657 658 def register_cmdline_args(self, parser): 659 """ 660 Hook for registering command line arguments to be 661 consumed in `process_cmdline_args()`. 662 663 `parser` - Instance of `argparse.ArgumentParser`. 664 """ 665 pass 666 667 def process_cmdline_args(self, pargs): 668 """ 669 Hook for handling parsed arguments. Implementations 670 should not modify `pargs`. 671 672 `pargs` - Instance of `argparse.Namespace` containing 673 parsed command line arguments. 674 675 Return `True` if plug-in should be used, otherwise 676 return `False`. 677 """ 678 return True 679 680 def destroy(self): 681 """ 682 Hook called when a plugin is about to be destroyed. 683 Implementations should free any allocated resources here. 684 """ 685 pass 686 687 # Symbolization hooks 688 def filter_binary_path(self, binary_path): 689 """ 690 Given a binary path return a binary path suitable for symbolication. 691 692 Implementations should return `None` if symbolication of this binary 693 should be skipped. 694 """ 695 return binary_path 696 697 def filter_module_desc(self, module_desc): 698 """ 699 Given a ModuleDesc object (`module_desc`) return 700 a ModuleDesc suitable for symbolication. 701 702 Implementations should return `None` if symbolication of this binary 703 should be skipped. 704 """ 705 return module_desc 706 707class ModuleDesc(object): 708 def __init__(self, name, arch, start_addr, end_addr, module_path, uuid): 709 self.name = name 710 self.arch = arch 711 self.start_addr = start_addr 712 self.end_addr = end_addr 713 # Module path from an ASan report. 714 self.module_path = module_path 715 # Module for performing symbolization, by default same as above. 716 self.module_path_for_symbolization = module_path 717 self.uuid = uuid 718 assert self.is_valid() 719 720 def __str__(self): 721 assert self.is_valid() 722 return "{name} {arch} {start_addr:#016x}-{end_addr:#016x} {module_path} {uuid}".format( 723 name=self.name, 724 arch=self.arch, 725 start_addr=self.start_addr, 726 end_addr=self.end_addr, 727 module_path=self.module_path if self.module_path == self.module_path_for_symbolization else '{} ({})'.format(self.module_path_for_symbolization, self.module_path), 728 uuid=self.uuid 729 ) 730 731 def is_valid(self): 732 if not isinstance(self.name, str): 733 return False 734 if not isinstance(self.arch, str): 735 return False 736 if not isinstance(self.start_addr, int): 737 return False 738 if self.start_addr < 0: 739 return False 740 if not isinstance(self.end_addr, int): 741 return False 742 if self.end_addr <= self.start_addr: 743 return False 744 if not isinstance(self.module_path, str): 745 return False 746 if not os.path.isabs(self.module_path): 747 return False 748 if not isinstance(self.module_path_for_symbolization, str): 749 return False 750 if not os.path.isabs(self.module_path_for_symbolization): 751 return False 752 if not isinstance(self.uuid, str): 753 return False 754 return True 755 756class GetUUIDFromBinaryException(Exception): 757 def __init__(self, msg): 758 super(GetUUIDFromBinaryException, self).__init__(msg) 759 760_get_uuid_from_binary_cache = dict() 761 762def get_uuid_from_binary(path_to_binary, arch=None): 763 cache_key = (path_to_binary, arch) 764 cached_value = _get_uuid_from_binary_cache.get(cache_key) 765 if cached_value: 766 return cached_value 767 if not os.path.exists(path_to_binary): 768 raise GetUUIDFromBinaryException('Binary "{}" does not exist'.format(path_to_binary)) 769 cmd = [ '/usr/bin/otool', '-l'] 770 if arch: 771 cmd.extend(['-arch', arch]) 772 cmd.append(path_to_binary) 773 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) 774 # Look for this output: 775 # cmd LC_UUID 776 # cmdsize 24 777 # uuid 4CA778FE-5BF9-3C45-AE59-7DF01B2BE83F 778 if isinstance(output, str): 779 output_str = output 780 else: 781 assert isinstance(output, bytes) 782 output_str = output.decode() 783 assert isinstance(output_str, str) 784 lines = output_str.split('\n') 785 uuid = None 786 for index, line in enumerate(lines): 787 stripped_line = line.strip() 788 if not stripped_line.startswith('cmd LC_UUID'): 789 continue 790 uuid_line = lines[index+2].strip() 791 if not uuid_line.startswith('uuid'): 792 raise GetUUIDFromBinaryException('Malformed output: "{}"'.format(uuid_line)) 793 split_uuid_line = uuid_line.split() 794 uuid = split_uuid_line[1] 795 break 796 if uuid is None: 797 logging.error('Failed to retrieve UUID from binary {}'.format(path_to_binary)) 798 logging.error('otool output was:\n{}'.format(output_str)) 799 raise GetUUIDFromBinaryException('Failed to retrieve UUID from binary "{}"'.format(path_to_binary)) 800 else: 801 # Update cache 802 _get_uuid_from_binary_cache[cache_key] = uuid 803 return uuid 804 805class ModuleMap(object): 806 def __init__(self): 807 self._module_name_to_description_map = dict() 808 809 def add_module(self, desc): 810 assert isinstance(desc, ModuleDesc) 811 assert desc.name not in self._module_name_to_description_map 812 self._module_name_to_description_map[desc.name] = desc 813 814 def find_module_by_name(self, name): 815 return self._module_name_to_description_map.get(name, None) 816 817 def __str__(self): 818 s = '{} modules:\n'.format(self.num_modules) 819 for module_desc in sorted(self._module_name_to_description_map.values(), key=lambda v: v.start_addr): 820 s += str(module_desc) + '\n' 821 return s 822 823 @property 824 def num_modules(self): 825 return len(self._module_name_to_description_map) 826 827 @property 828 def modules(self): 829 return set(self._module_name_to_description_map.values()) 830 831 def get_module_path_for_symbolication(self, module_name, proxy, validate_uuid): 832 module_desc = self.find_module_by_name(module_name) 833 if module_desc is None: 834 return None 835 # Allow a plug-in to change the module description to make it 836 # suitable for symbolication or avoid symbolication altogether. 837 module_desc = proxy.filter_module_desc(module_desc) 838 if module_desc is None: 839 return None 840 if validate_uuid: 841 logging.debug('Validating UUID of {}'.format(module_desc.module_path_for_symbolization)) 842 try: 843 uuid = get_uuid_from_binary(module_desc.module_path_for_symbolization, arch = module_desc.arch) 844 if uuid != module_desc.uuid: 845 logging.warning("Detected UUID mismatch {} != {}".format(uuid, module_desc.uuid)) 846 # UUIDs don't match. Tell client to not symbolize this. 847 return None 848 except GetUUIDFromBinaryException as e: 849 logging.error('Failed to get binary from UUID: %s', str(e)) 850 return None 851 else: 852 logging.warning('Skipping validation of UUID of {}'.format(module_desc.module_path_for_symbolization)) 853 return module_desc.module_path_for_symbolization 854 855 @staticmethod 856 def parse_from_file(module_map_path): 857 if not os.path.exists(module_map_path): 858 raise Exception('module map "{}" does not exist'.format(module_map_path)) 859 with open(module_map_path, 'r') as f: 860 mm = None 861 # E.g. 862 # 0x2db4000-0x102ddc000 /path/to (arm64) <0D6BBDE0-FF90-3680-899D-8E6F9528E04C> 863 hex_regex = lambda name: r'0x(?P<' + name + r'>[0-9a-f]+)' 864 module_path_regex = r'(?P<path>.+)' 865 arch_regex = r'\((?P<arch>.+)\)' 866 uuid_regex = r'<(?P<uuid>[0-9A-Z-]+)>' 867 line_regex = r'^{}-{}\s+{}\s+{}\s+{}'.format( 868 hex_regex('start_addr'), 869 hex_regex('end_addr'), 870 module_path_regex, 871 arch_regex, 872 uuid_regex 873 ) 874 matcher = re.compile(line_regex) 875 line_num = 0 876 line = 'dummy' 877 while line != '': 878 line = f.readline() 879 line_num += 1 880 if mm is None: 881 if line.startswith('Process module map:'): 882 mm = ModuleMap() 883 continue 884 if line.startswith('End of module map'): 885 break 886 m_obj = matcher.match(line) 887 if not m_obj: 888 raise Exception('Failed to parse line {} "{}"'.format(line_num, line)) 889 arch = m_obj.group('arch') 890 start_addr = int(m_obj.group('start_addr'), base=16) 891 end_addr = int(m_obj.group('end_addr'), base=16) 892 module_path = m_obj.group('path') 893 uuid = m_obj.group('uuid') 894 module_desc = ModuleDesc( 895 name=os.path.basename(module_path), 896 arch=arch, 897 start_addr=start_addr, 898 end_addr=end_addr, 899 module_path=module_path, 900 uuid=uuid 901 ) 902 mm.add_module(module_desc) 903 if mm is not None: 904 logging.debug('Loaded Module map from "{}":\n{}'.format( 905 f.name, 906 str(mm)) 907 ) 908 return mm 909 910class SysRootFilterPlugIn(AsanSymbolizerPlugIn): 911 """ 912 Simple plug-in to add sys root prefix to all binary paths 913 used for symbolication. 914 """ 915 def __init__(self): 916 self.sysroot_path = "" 917 918 def register_cmdline_args(self, parser): 919 parser.add_argument('-s', dest='sys_root', metavar='SYSROOT', 920 help='set path to sysroot for sanitized binaries') 921 922 def process_cmdline_args(self, pargs): 923 if pargs.sys_root is None: 924 # Not being used so remove ourselves. 925 return False 926 self.sysroot_path = pargs.sys_root 927 return True 928 929 def filter_binary_path(self, path): 930 return self.sysroot_path + path 931 932class ModuleMapPlugIn(AsanSymbolizerPlugIn): 933 def __init__(self): 934 self._module_map = None 935 self._uuid_validation = True 936 def register_cmdline_args(self, parser): 937 parser.add_argument('--module-map', 938 help='Path to text file containing module map' 939 'output. See print_module_map ASan option.') 940 parser.add_argument('--skip-uuid-validation', 941 default=False, 942 action='store_true', 943 help='Skips validating UUID of modules using otool.') 944 945 def process_cmdline_args(self, pargs): 946 if not pargs.module_map: 947 return False 948 self._module_map = ModuleMap.parse_from_file(args.module_map) 949 if self._module_map is None: 950 msg = 'Failed to find module map' 951 logging.error(msg) 952 raise Exception(msg) 953 self._uuid_validation = not pargs.skip_uuid_validation 954 return True 955 956 def filter_binary_path(self, binary_path): 957 if os.path.isabs(binary_path): 958 # This is a binary path so transform into 959 # a module name 960 module_name = os.path.basename(binary_path) 961 else: 962 module_name = binary_path 963 return self._module_map.get_module_path_for_symbolication( 964 module_name, 965 self.proxy, 966 self._uuid_validation 967 ) 968 969def add_logging_args(parser): 970 parser.add_argument('--log-dest', 971 default=None, 972 help='Destination path for script logging (default stderr).', 973 ) 974 parser.add_argument('--log-level', 975 choices=['debug', 'info', 'warning', 'error', 'critical'], 976 default='info', 977 help='Log level for script (default: %(default)s).' 978 ) 979 980def setup_logging(): 981 # Set up a parser just for parsing the logging arguments. 982 # This is necessary because logging should be configured before we 983 # perform the main argument parsing. 984 parser = argparse.ArgumentParser(add_help=False) 985 add_logging_args(parser) 986 pargs, unparsed_args = parser.parse_known_args() 987 988 log_level = getattr(logging, pargs.log_level.upper()) 989 if log_level == logging.DEBUG: 990 log_format = '%(levelname)s: [%(funcName)s() %(filename)s:%(lineno)d] %(message)s' 991 else: 992 log_format = '%(levelname)s: %(message)s' 993 basic_config = { 994 'level': log_level, 995 'format': log_format 996 } 997 log_dest = pargs.log_dest 998 if log_dest: 999 basic_config['filename'] = log_dest 1000 logging.basicConfig(**basic_config) 1001 logging.debug('Logging level set to "{}" and directing output to "{}"'.format( 1002 pargs.log_level, 1003 'stderr' if log_dest is None else log_dest) 1004 ) 1005 return unparsed_args 1006 1007def add_load_plugin_args(parser): 1008 parser.add_argument('-p', '--plugins', 1009 help='Load plug-in', nargs='+', default=[]) 1010 1011def setup_plugins(plugin_proxy, args): 1012 parser = argparse.ArgumentParser(add_help=False) 1013 add_load_plugin_args(parser) 1014 pargs , unparsed_args = parser.parse_known_args() 1015 for plugin_path in pargs.plugins: 1016 plugin_proxy.load_plugin_from_file(plugin_path) 1017 # Add built-in plugins. 1018 plugin_proxy.add_plugin(ModuleMapPlugIn()) 1019 plugin_proxy.add_plugin(SysRootFilterPlugIn()) 1020 return unparsed_args 1021 1022if __name__ == '__main__': 1023 remaining_args = setup_logging() 1024 with AsanSymbolizerPlugInProxy() as plugin_proxy: 1025 remaining_args = setup_plugins(plugin_proxy, remaining_args) 1026 parser = argparse.ArgumentParser( 1027 formatter_class=argparse.RawDescriptionHelpFormatter, 1028 description='ASan symbolization script', 1029 epilog=__doc__) 1030 parser.add_argument('path_to_cut', nargs='*', 1031 help='pattern to be cut from the result file path ') 1032 parser.add_argument('-d','--demangle', action='store_true', 1033 help='demangle function names') 1034 parser.add_argument('-c', metavar='CROSS_COMPILE', 1035 help='set prefix for binutils') 1036 parser.add_argument('-l','--logfile', default=sys.stdin, 1037 type=argparse.FileType('r'), 1038 help='set log file name to parse, default is stdin') 1039 parser.add_argument('--force-system-symbolizer', action='store_true', 1040 help='don\'t use llvm-symbolizer') 1041 # Add logging arguments so that `--help` shows them. 1042 add_logging_args(parser) 1043 # Add load plugin arguments so that `--help` shows them. 1044 add_load_plugin_args(parser) 1045 plugin_proxy.register_cmdline_args(parser) 1046 args = parser.parse_args(remaining_args) 1047 plugin_proxy.process_cmdline_args(args) 1048 if args.path_to_cut: 1049 fix_filename_patterns = args.path_to_cut 1050 if args.demangle: 1051 demangle = True 1052 if args.c: 1053 binutils_prefix = args.c 1054 if args.logfile: 1055 logfile = args.logfile 1056 else: 1057 logfile = sys.stdin 1058 if args.force_system_symbolizer: 1059 force_system_symbolizer = True 1060 if force_system_symbolizer: 1061 assert(allow_system_symbolizer) 1062 loop = SymbolizationLoop(plugin_proxy) 1063 loop.process_logfile() 1064