1#!/usr/bin/env python 2# ===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https://llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7# 8# ===------------------------------------------------------------------------===# 9""" 10Example of use: 11 asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log 12 13PLUGINS 14 15This script provides a way for external plug-ins to hook into the behaviour of 16various parts of this script (see `--plugins`). This is useful for situations 17where it is necessary to handle site-specific quirks (e.g. binaries with debug 18symbols only accessible via a remote service) without having to modify the 19script itself. 20 21""" 22import argparse 23import bisect 24import errno 25import getopt 26import logging 27import os 28import re 29import shutil 30import subprocess 31import sys 32 33symbolizers = {} 34demangle = False 35binutils_prefix = None 36fix_filename_patterns = None 37logfile = sys.stdin 38allow_system_symbolizer = True 39force_system_symbolizer = False 40 41# FIXME: merge the code that calls fix_filename(). 42def fix_filename(file_name): 43 if fix_filename_patterns: 44 for path_to_cut in fix_filename_patterns: 45 file_name = re.sub(".*" + path_to_cut, "", file_name) 46 file_name = re.sub(".*asan_[a-z_]*.(cc|cpp):[0-9]*", "_asan_rtl_", file_name) 47 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) 48 return file_name 49 50 51def is_valid_arch(s): 52 return s in [ 53 "i386", 54 "x86_64", 55 "x86_64h", 56 "arm", 57 "armv6", 58 "armv7", 59 "armv7s", 60 "armv7k", 61 "arm64", 62 "powerpc64", 63 "powerpc64le", 64 "s390x", 65 "s390", 66 "riscv64", 67 "loongarch64", 68 ] 69 70 71def guess_arch(addr): 72 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 73 if len(addr) > 10: 74 return "x86_64" 75 else: 76 return "i386" 77 78 79class Symbolizer(object): 80 def __init__(self): 81 pass 82 83 def symbolize(self, addr, binary, offset): 84 """Symbolize the given address (pair of binary and offset). 85 86 Overriden in subclasses. 87 Args: 88 addr: virtual address of an instruction. 89 binary: path to executable/shared object containing this instruction. 90 offset: instruction offset in the @binary. 91 Returns: 92 list of strings (one string for each inlined frame) describing 93 the code locations for this instruction (that is, function name, file 94 name, line and column numbers). 95 """ 96 return None 97 98 99class LLVMSymbolizer(Symbolizer): 100 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): 101 super(LLVMSymbolizer, self).__init__() 102 self.symbolizer_path = symbolizer_path 103 self.default_arch = default_arch 104 self.system = system 105 self.dsym_hints = dsym_hints 106 self.pipe = self.open_llvm_symbolizer() 107 108 def open_llvm_symbolizer(self): 109 cmd = [ 110 self.symbolizer_path, 111 ("--demangle" if demangle else "--no-demangle"), 112 "--functions=linkage", 113 "--inlines", 114 "--default-arch=%s" % self.default_arch, 115 ] 116 if self.system == "Darwin": 117 for hint in self.dsym_hints: 118 cmd.append("--dsym-hint=%s" % hint) 119 logging.debug(" ".join(cmd)) 120 try: 121 result = subprocess.Popen( 122 cmd, 123 stdin=subprocess.PIPE, 124 stdout=subprocess.PIPE, 125 bufsize=0, 126 universal_newlines=True, 127 ) 128 except OSError: 129 result = None 130 return result 131 132 def symbolize(self, addr, binary, offset): 133 """Overrides Symbolizer.symbolize.""" 134 if not self.pipe: 135 return None 136 result = [] 137 try: 138 symbolizer_input = '"%s" %s' % (binary, offset) 139 logging.debug(symbolizer_input) 140 self.pipe.stdin.write("%s\n" % symbolizer_input) 141 while True: 142 function_name = self.pipe.stdout.readline().rstrip() 143 if not function_name: 144 break 145 file_name = self.pipe.stdout.readline().rstrip() 146 file_name = fix_filename(file_name) 147 if not function_name.startswith("??") or not file_name.startswith("??"): 148 # Append only non-trivial frames. 149 result.append("%s in %s %s" % (addr, function_name, file_name)) 150 except Exception: 151 result = [] 152 if not result: 153 result = None 154 return result 155 156 157def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]): 158 symbolizer_path = os.getenv("LLVM_SYMBOLIZER_PATH") 159 if not symbolizer_path: 160 symbolizer_path = os.getenv("ASAN_SYMBOLIZER_PATH") 161 if not symbolizer_path: 162 # Assume llvm-symbolizer is in PATH. 163 symbolizer_path = "llvm-symbolizer" 164 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints) 165 166 167class Addr2LineSymbolizer(Symbolizer): 168 def __init__(self, binary): 169 super(Addr2LineSymbolizer, self).__init__() 170 self.binary = binary 171 self.pipe = self.open_addr2line() 172 self.output_terminator = -1 173 174 def open_addr2line(self): 175 addr2line_tool = "addr2line" 176 if binutils_prefix: 177 addr2line_tool = binutils_prefix + addr2line_tool 178 logging.debug("addr2line binary is %s" % shutil.which(addr2line_tool)) 179 cmd = [addr2line_tool, "-fi"] 180 if demangle: 181 cmd += ["--demangle"] 182 cmd += ["-e", self.binary] 183 logging.debug(" ".join(cmd)) 184 return subprocess.Popen( 185 cmd, 186 stdin=subprocess.PIPE, 187 stdout=subprocess.PIPE, 188 bufsize=0, 189 universal_newlines=True, 190 ) 191 192 def symbolize(self, addr, binary, offset): 193 """Overrides Symbolizer.symbolize.""" 194 if self.binary != binary: 195 return None 196 lines = [] 197 try: 198 self.pipe.stdin.write("%s\n" % offset) 199 self.pipe.stdin.write("%s\n" % self.output_terminator) 200 is_first_frame = True 201 while True: 202 function_name = self.pipe.stdout.readline().rstrip() 203 logging.debug("read function_name='%s' from addr2line" % function_name) 204 # If llvm-symbolizer is installed as addr2line, older versions of 205 # llvm-symbolizer will print -1 when presented with -1 and not print 206 # a second line. In that case we will block for ever trying to read the 207 # file name. This also happens for non-existent files, in which case GNU 208 # addr2line exits immediate, but llvm-symbolizer does not (see 209 # https://llvm.org/PR42754). 210 if function_name == "-1": 211 logging.debug("got function '-1' -> no more input") 212 break 213 file_name = self.pipe.stdout.readline().rstrip() 214 logging.debug("read file_name='%s' from addr2line" % file_name) 215 if is_first_frame: 216 is_first_frame = False 217 elif function_name == "??": 218 assert file_name == "??:0", file_name 219 logging.debug("got function '??' -> no more input") 220 break 221 elif not function_name: 222 assert not file_name, file_name 223 logging.debug("got empty function name -> no more input") 224 break 225 if not function_name and not file_name: 226 logging.debug( 227 "got empty function and file name -> unknown function" 228 ) 229 function_name = "??" 230 file_name = "??:0" 231 lines.append((function_name, file_name)) 232 except IOError as e: 233 # EPIPE happens if addr2line exits early (which some implementations do 234 # if an invalid file is passed). 235 if e.errno == errno.EPIPE: 236 logging.debug( 237 f"addr2line exited early (broken pipe) returncode={self.pipe.poll()}" 238 ) 239 else: 240 logging.debug( 241 "unexpected I/O exception communicating with addr2line", exc_info=e 242 ) 243 lines.append(("??", "??:0")) 244 except Exception as e: 245 logging.debug( 246 "got unknown exception communicating with addr2line", exc_info=e 247 ) 248 lines.append(("??", "??:0")) 249 return [ 250 "%s in %s %s" % (addr, function, fix_filename(file)) 251 for (function, file) in lines 252 ] 253 254 255class UnbufferedLineConverter(object): 256 """ 257 Wrap a child process that responds to each line of input with one line of 258 output. Uses pty to trick the child into providing unbuffered output. 259 """ 260 261 def __init__(self, args, close_stderr=False): 262 # Local imports so that the script can start on Windows. 263 import pty 264 import termios 265 266 pid, fd = pty.fork() 267 if pid == 0: 268 # We're the child. Transfer control to command. 269 if close_stderr: 270 dev_null = os.open("/dev/null", 0) 271 os.dup2(dev_null, 2) 272 os.execvp(args[0], args) 273 else: 274 # Disable echoing. 275 attr = termios.tcgetattr(fd) 276 attr[3] = attr[3] & ~termios.ECHO 277 termios.tcsetattr(fd, termios.TCSANOW, attr) 278 # Set up a file()-like interface to the child process 279 self.r = os.fdopen(fd, "r", 1) 280 self.w = os.fdopen(os.dup(fd), "w", 1) 281 282 def convert(self, line): 283 self.w.write(line + "\n") 284 return self.readline() 285 286 def readline(self): 287 return self.r.readline().rstrip() 288 289 290class DarwinSymbolizer(Symbolizer): 291 def __init__(self, addr, binary, arch): 292 super(DarwinSymbolizer, self).__init__() 293 self.binary = binary 294 self.arch = arch 295 self.open_atos() 296 297 def open_atos(self): 298 logging.debug("atos -o %s -arch %s", self.binary, self.arch) 299 cmdline = ["atos", "-o", self.binary, "-arch", self.arch] 300 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) 301 302 def symbolize(self, addr, binary, offset): 303 """Overrides Symbolizer.symbolize.""" 304 if self.binary != binary: 305 return None 306 if not os.path.exists(binary): 307 # If the binary doesn't exist atos will exit which will lead to IOError 308 # exceptions being raised later on so just don't try to symbolize. 309 return ["{} ({}:{}+{})".format(addr, binary, self.arch, offset)] 310 atos_line = self.atos.convert("0x%x" % int(offset, 16)) 311 while "got symbolicator for" in atos_line: 312 atos_line = self.atos.readline() 313 # A well-formed atos response looks like this: 314 # foo(type1, type2) (in object.name) (filename.cc:80) 315 # NOTE: 316 # * For C functions atos omits parentheses and argument types. 317 # * For C++ functions the function name (i.e., `foo` above) may contain 318 # templates which may contain parentheses. 319 match = re.match(r"^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line) 320 logging.debug("atos_line: %s", atos_line) 321 if match: 322 function_name = match.group(1) 323 file_name = fix_filename(match.group(3)) 324 return ["%s in %s %s" % (addr, function_name, file_name)] 325 else: 326 return ["%s in %s" % (addr, atos_line)] 327 328 329# Chain several symbolizers so that if one symbolizer fails, we fall back 330# to the next symbolizer in chain. 331class ChainSymbolizer(Symbolizer): 332 def __init__(self, symbolizer_list): 333 super(ChainSymbolizer, self).__init__() 334 self.symbolizer_list = symbolizer_list 335 336 def symbolize(self, addr, binary, offset): 337 """Overrides Symbolizer.symbolize.""" 338 for symbolizer in self.symbolizer_list: 339 if symbolizer: 340 result = symbolizer.symbolize(addr, binary, offset) 341 if result: 342 return result 343 return None 344 345 def append_symbolizer(self, symbolizer): 346 self.symbolizer_list.append(symbolizer) 347 348 349def BreakpadSymbolizerFactory(binary): 350 suffix = os.getenv("BREAKPAD_SUFFIX") 351 if suffix: 352 filename = binary + suffix 353 if os.access(filename, os.F_OK): 354 return BreakpadSymbolizer(filename) 355 return None 356 357 358def SystemSymbolizerFactory(system, addr, binary, arch): 359 if system == "Darwin": 360 return DarwinSymbolizer(addr, binary, arch) 361 elif system in ["Linux", "FreeBSD", "NetBSD", "SunOS"]: 362 return Addr2LineSymbolizer(binary) 363 364 365class BreakpadSymbolizer(Symbolizer): 366 def __init__(self, filename): 367 super(BreakpadSymbolizer, self).__init__() 368 self.filename = filename 369 lines = file(filename).readlines() 370 self.files = [] 371 self.symbols = {} 372 self.address_list = [] 373 self.addresses = {} 374 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 375 fragments = lines[0].rstrip().split() 376 self.arch = fragments[2] 377 self.debug_id = fragments[3] 378 self.binary = " ".join(fragments[4:]) 379 self.parse_lines(lines[1:]) 380 381 def parse_lines(self, lines): 382 cur_function_addr = "" 383 for line in lines: 384 fragments = line.split() 385 if fragments[0] == "FILE": 386 assert int(fragments[1]) == len(self.files) 387 self.files.append(" ".join(fragments[2:])) 388 elif fragments[0] == "PUBLIC": 389 self.symbols[int(fragments[1], 16)] = " ".join(fragments[3:]) 390 elif fragments[0] in ["CFI", "STACK"]: 391 pass 392 elif fragments[0] == "FUNC": 393 cur_function_addr = int(fragments[1], 16) 394 if not cur_function_addr in self.symbols.keys(): 395 self.symbols[cur_function_addr] = " ".join(fragments[4:]) 396 else: 397 # Line starting with an address. 398 addr = int(fragments[0], 16) 399 self.address_list.append(addr) 400 # Tuple of symbol address, size, line, file number. 401 self.addresses[addr] = ( 402 cur_function_addr, 403 int(fragments[1], 16), 404 int(fragments[2]), 405 int(fragments[3]), 406 ) 407 self.address_list.sort() 408 409 def get_sym_file_line(self, addr): 410 key = None 411 if addr in self.addresses.keys(): 412 key = addr 413 else: 414 index = bisect.bisect_left(self.address_list, addr) 415 if index == 0: 416 return None 417 else: 418 key = self.address_list[index - 1] 419 sym_id, size, line_no, file_no = self.addresses[key] 420 symbol = self.symbols[sym_id] 421 filename = self.files[file_no] 422 if addr < key + size: 423 return symbol, filename, line_no 424 else: 425 return None 426 427 def symbolize(self, addr, binary, offset): 428 if self.binary != binary: 429 return None 430 res = self.get_sym_file_line(int(offset, 16)) 431 if res: 432 function_name, file_name, line_no = res 433 result = ["%s in %s %s:%d" % (addr, function_name, file_name, line_no)] 434 print(result) 435 return result 436 else: 437 return None 438 439 440class SymbolizationLoop(object): 441 def __init__(self, plugin_proxy=None, dsym_hint_producer=None): 442 self.plugin_proxy = plugin_proxy 443 if sys.platform == "win32": 444 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works 445 # even in sandboxed processes. Nothing needs to be done here. 446 self.process_line = self.process_line_echo 447 else: 448 # Used by clients who may want to supply a different binary name. 449 # E.g. in Chrome several binaries may share a single .dSYM. 450 self.dsym_hint_producer = dsym_hint_producer 451 self.system = os.uname()[0] 452 if self.system not in ["Linux", "Darwin", "FreeBSD", "NetBSD", "SunOS"]: 453 raise Exception("Unknown system") 454 self.llvm_symbolizers = {} 455 self.last_llvm_symbolizer = None 456 self.dsym_hints = set([]) 457 self.frame_no = 0 458 self.process_line = self.process_line_posix 459 self.using_module_map = plugin_proxy.has_plugin(ModuleMapPlugIn.get_name()) 460 461 def symbolize_address(self, addr, binary, offset, arch): 462 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use 463 # a single symbolizer binary. 464 # On Darwin, if the dsym hint producer is present: 465 # 1. check whether we've seen this binary already; if so, 466 # use |llvm_symbolizers[binary]|, which has already loaded the debug 467 # info for this binary (might not be the case for 468 # |last_llvm_symbolizer|); 469 # 2. otherwise check if we've seen all the hints for this binary already; 470 # if so, reuse |last_llvm_symbolizer| which has the full set of hints; 471 # 3. otherwise create a new symbolizer and pass all currently known 472 # .dSYM hints to it. 473 result = None 474 if not force_system_symbolizer: 475 if not binary in self.llvm_symbolizers: 476 use_new_symbolizer = True 477 if self.system == "Darwin" and self.dsym_hint_producer: 478 dsym_hints_for_binary = set(self.dsym_hint_producer(binary)) 479 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints) 480 self.dsym_hints |= dsym_hints_for_binary 481 if self.last_llvm_symbolizer and not use_new_symbolizer: 482 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 483 else: 484 self.last_llvm_symbolizer = LLVMSymbolizerFactory( 485 self.system, arch, self.dsym_hints 486 ) 487 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 488 # Use the chain of symbolizers: 489 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 490 # (fall back to next symbolizer if the previous one fails). 491 if not binary in symbolizers: 492 symbolizers[binary] = ChainSymbolizer( 493 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]] 494 ) 495 result = symbolizers[binary].symbolize(addr, binary, offset) 496 else: 497 symbolizers[binary] = ChainSymbolizer([]) 498 if result is None: 499 if not allow_system_symbolizer: 500 raise Exception("Failed to launch or use llvm-symbolizer.") 501 # Initialize system symbolizer only if other symbolizers failed. 502 symbolizers[binary].append_symbolizer( 503 SystemSymbolizerFactory(self.system, addr, binary, arch) 504 ) 505 result = symbolizers[binary].symbolize(addr, binary, offset) 506 # The system symbolizer must produce some result. 507 assert result 508 return result 509 510 def get_symbolized_lines(self, symbolized_lines, inc_frame_counter=True): 511 if not symbolized_lines: 512 if inc_frame_counter: 513 self.frame_no += 1 514 return [self.current_line] 515 else: 516 assert inc_frame_counter 517 result = [] 518 for symbolized_frame in symbolized_lines: 519 result.append( 520 " #%s %s" % (str(self.frame_no), symbolized_frame.rstrip()) 521 ) 522 self.frame_no += 1 523 return result 524 525 def process_logfile(self): 526 self.frame_no = 0 527 for line in logfile: 528 processed = self.process_line(line) 529 print("\n".join(processed)) 530 531 def process_line_echo(self, line): 532 return [line.rstrip()] 533 534 def process_line_posix(self, line): 535 self.current_line = line.rstrip() 536 # Unsymbolicated: 537 # #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 538 # Partially symbolicated: 539 # #0 0x7f6e35cf2e45 in foo (foo.so+0x11fe45) 540 # NOTE: We have to very liberal with symbol 541 # names in the regex because it could be an 542 # Objective-C or C++ demangled name. 543 stack_trace_line_format = ( 544 r"^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)" 545 ) 546 match = re.match(stack_trace_line_format, line) 547 if not match: 548 logging.debug('Line "{}" does not match regex'.format(line)) 549 # Not a frame line so don't increment the frame counter. 550 return self.get_symbolized_lines(None, inc_frame_counter=False) 551 logging.debug(line) 552 _, frameno_str, addr, binary, offset = match.groups() 553 554 if not self.using_module_map and not os.path.isabs(binary): 555 # Do not try to symbolicate if the binary is just the module file name 556 # and a module map is unavailable. 557 # FIXME(dliew): This is currently necessary for reports on Darwin that are 558 # partially symbolicated by `atos`. 559 return self.get_symbolized_lines(None) 560 arch = "" 561 # Arch can be embedded in the filename, e.g.: "libabc.dylib:x86_64h" 562 colon_pos = binary.rfind(":") 563 if colon_pos != -1: 564 maybe_arch = binary[colon_pos + 1 :] 565 if is_valid_arch(maybe_arch): 566 arch = maybe_arch 567 binary = binary[0:colon_pos] 568 if arch == "": 569 arch = guess_arch(addr) 570 if frameno_str == "0": 571 # Assume that frame #0 is the first frame of new stack trace. 572 self.frame_no = 0 573 original_binary = binary 574 binary = self.plugin_proxy.filter_binary_path(binary) 575 if binary is None: 576 # The binary filter has told us this binary can't be symbolized. 577 logging.debug('Skipping symbolication of binary "%s"', original_binary) 578 return self.get_symbolized_lines(None) 579 symbolized_line = self.symbolize_address(addr, binary, offset, arch) 580 if not symbolized_line: 581 if original_binary != binary: 582 symbolized_line = self.symbolize_address( 583 addr, original_binary, offset, arch 584 ) 585 return self.get_symbolized_lines(symbolized_line) 586 587 588class AsanSymbolizerPlugInProxy(object): 589 """ 590 Serves several purposes: 591 - Manages the lifetime of plugins (must be used a `with` statement). 592 - Provides interface for calling into plugins from within this script. 593 """ 594 595 def __init__(self): 596 self._plugins = [] 597 self._plugin_names = set() 598 599 def _load_plugin_from_file_impl_py_gt_2(self, file_path, globals_space): 600 with open(file_path, "r") as f: 601 exec(f.read(), globals_space, None) 602 603 def load_plugin_from_file(self, file_path): 604 logging.info('Loading plugins from "{}"'.format(file_path)) 605 globals_space = dict(globals()) 606 # Provide function to register plugins 607 def register_plugin(plugin): 608 logging.info("Registering plugin %s", plugin.get_name()) 609 self.add_plugin(plugin) 610 611 globals_space["register_plugin"] = register_plugin 612 if sys.version_info.major < 3: 613 execfile(file_path, globals_space, None) 614 else: 615 # Indirection here is to avoid a bug in older Python 2 versions: 616 # `SyntaxError: unqualified exec is not allowed in function ...` 617 self._load_plugin_from_file_impl_py_gt_2(file_path, globals_space) 618 619 def add_plugin(self, plugin): 620 assert isinstance(plugin, AsanSymbolizerPlugIn) 621 self._plugins.append(plugin) 622 self._plugin_names.add(plugin.get_name()) 623 plugin._receive_proxy(self) 624 625 def remove_plugin(self, plugin): 626 assert isinstance(plugin, AsanSymbolizerPlugIn) 627 self._plugins.remove(plugin) 628 self._plugin_names.remove(plugin.get_name()) 629 logging.debug("Removing plugin %s", plugin.get_name()) 630 plugin.destroy() 631 632 def has_plugin(self, name): 633 """ 634 Returns true iff the plugin name is currently 635 being managed by AsanSymbolizerPlugInProxy. 636 """ 637 return name in self._plugin_names 638 639 def register_cmdline_args(self, parser): 640 plugins = list(self._plugins) 641 for plugin in plugins: 642 plugin.register_cmdline_args(parser) 643 644 def process_cmdline_args(self, pargs): 645 # Use copy so we can remove items as we iterate. 646 plugins = list(self._plugins) 647 for plugin in plugins: 648 keep = plugin.process_cmdline_args(pargs) 649 assert isinstance(keep, bool) 650 if not keep: 651 self.remove_plugin(plugin) 652 653 def __enter__(self): 654 return self 655 656 def __exit__(self, exc_type, exc_val, exc_tb): 657 for plugin in self._plugins: 658 plugin.destroy() 659 # Don't suppress raised exceptions 660 return False 661 662 def _filter_single_value(self, function_name, input_value): 663 """ 664 Helper for filter style plugin functions. 665 """ 666 new_value = input_value 667 for plugin in self._plugins: 668 result = getattr(plugin, function_name)(new_value) 669 if result is None: 670 return None 671 new_value = result 672 return new_value 673 674 def filter_binary_path(self, binary_path): 675 """ 676 Consult available plugins to filter the path to a binary 677 to make it suitable for symbolication. 678 679 Returns `None` if symbolication should not be attempted for this 680 binary. 681 """ 682 return self._filter_single_value("filter_binary_path", binary_path) 683 684 def filter_module_desc(self, module_desc): 685 """ 686 Consult available plugins to determine the module 687 description suitable for symbolication. 688 689 Returns `None` if symbolication should not be attempted for this module. 690 """ 691 assert isinstance(module_desc, ModuleDesc) 692 return self._filter_single_value("filter_module_desc", module_desc) 693 694 695class AsanSymbolizerPlugIn(object): 696 """ 697 This is the interface the `asan_symbolize.py` code uses to talk 698 to plugins. 699 """ 700 701 @classmethod 702 def get_name(cls): 703 """ 704 Returns the name of the plugin. 705 """ 706 return cls.__name__ 707 708 def _receive_proxy(self, proxy): 709 assert isinstance(proxy, AsanSymbolizerPlugInProxy) 710 self.proxy = proxy 711 712 def register_cmdline_args(self, parser): 713 """ 714 Hook for registering command line arguments to be 715 consumed in `process_cmdline_args()`. 716 717 `parser` - Instance of `argparse.ArgumentParser`. 718 """ 719 pass 720 721 def process_cmdline_args(self, pargs): 722 """ 723 Hook for handling parsed arguments. Implementations 724 should not modify `pargs`. 725 726 `pargs` - Instance of `argparse.Namespace` containing 727 parsed command line arguments. 728 729 Return `True` if plug-in should be used, otherwise 730 return `False`. 731 """ 732 return True 733 734 def destroy(self): 735 """ 736 Hook called when a plugin is about to be destroyed. 737 Implementations should free any allocated resources here. 738 """ 739 pass 740 741 # Symbolization hooks 742 def filter_binary_path(self, binary_path): 743 """ 744 Given a binary path return a binary path suitable for symbolication. 745 746 Implementations should return `None` if symbolication of this binary 747 should be skipped. 748 """ 749 return binary_path 750 751 def filter_module_desc(self, module_desc): 752 """ 753 Given a ModuleDesc object (`module_desc`) return 754 a ModuleDesc suitable for symbolication. 755 756 Implementations should return `None` if symbolication of this binary 757 should be skipped. 758 """ 759 return module_desc 760 761 762class ModuleDesc(object): 763 def __init__(self, name, arch, start_addr, end_addr, module_path, uuid): 764 self.name = name 765 self.arch = arch 766 self.start_addr = start_addr 767 self.end_addr = end_addr 768 # Module path from an ASan report. 769 self.module_path = module_path 770 # Module for performing symbolization, by default same as above. 771 self.module_path_for_symbolization = module_path 772 self.uuid = uuid 773 assert self.is_valid() 774 775 def __str__(self): 776 assert self.is_valid() 777 return "{name} {arch} {start_addr:#016x}-{end_addr:#016x} {module_path} {uuid}".format( 778 name=self.name, 779 arch=self.arch, 780 start_addr=self.start_addr, 781 end_addr=self.end_addr, 782 module_path=self.module_path 783 if self.module_path == self.module_path_for_symbolization 784 else "{} ({})".format(self.module_path_for_symbolization, self.module_path), 785 uuid=self.uuid, 786 ) 787 788 def is_valid(self): 789 if not isinstance(self.name, str): 790 return False 791 if not isinstance(self.arch, str): 792 return False 793 if not isinstance(self.start_addr, int): 794 return False 795 if self.start_addr < 0: 796 return False 797 if not isinstance(self.end_addr, int): 798 return False 799 if self.end_addr <= self.start_addr: 800 return False 801 if not isinstance(self.module_path, str): 802 return False 803 if not os.path.isabs(self.module_path): 804 return False 805 if not isinstance(self.module_path_for_symbolization, str): 806 return False 807 if not os.path.isabs(self.module_path_for_symbolization): 808 return False 809 if not isinstance(self.uuid, str): 810 return False 811 return True 812 813 814class GetUUIDFromBinaryException(Exception): 815 def __init__(self, msg): 816 super(GetUUIDFromBinaryException, self).__init__(msg) 817 818 819_get_uuid_from_binary_cache = dict() 820 821 822def get_uuid_from_binary(path_to_binary, arch=None): 823 cache_key = (path_to_binary, arch) 824 cached_value = _get_uuid_from_binary_cache.get(cache_key) 825 if cached_value: 826 return cached_value 827 if not os.path.exists(path_to_binary): 828 raise GetUUIDFromBinaryException( 829 'Binary "{}" does not exist'.format(path_to_binary) 830 ) 831 cmd = ["/usr/bin/otool", "-l"] 832 if arch: 833 cmd.extend(["-arch", arch]) 834 cmd.append(path_to_binary) 835 output = subprocess.check_output(cmd, stderr=subprocess.STDOUT) 836 # Look for this output: 837 # cmd LC_UUID 838 # cmdsize 24 839 # uuid 4CA778FE-5BF9-3C45-AE59-7DF01B2BE83F 840 if isinstance(output, str): 841 output_str = output 842 else: 843 assert isinstance(output, bytes) 844 output_str = output.decode() 845 assert isinstance(output_str, str) 846 lines = output_str.split("\n") 847 uuid = None 848 for index, line in enumerate(lines): 849 stripped_line = line.strip() 850 if not stripped_line.startswith("cmd LC_UUID"): 851 continue 852 uuid_line = lines[index + 2].strip() 853 if not uuid_line.startswith("uuid"): 854 raise GetUUIDFromBinaryException('Malformed output: "{}"'.format(uuid_line)) 855 split_uuid_line = uuid_line.split() 856 uuid = split_uuid_line[1] 857 break 858 if uuid is None: 859 logging.error("Failed to retrieve UUID from binary {}".format(path_to_binary)) 860 logging.error("otool output was:\n{}".format(output_str)) 861 raise GetUUIDFromBinaryException( 862 'Failed to retrieve UUID from binary "{}"'.format(path_to_binary) 863 ) 864 else: 865 # Update cache 866 _get_uuid_from_binary_cache[cache_key] = uuid 867 return uuid 868 869 870class ModuleMap(object): 871 def __init__(self): 872 self._module_name_to_description_map = dict() 873 874 def add_module(self, desc): 875 assert isinstance(desc, ModuleDesc) 876 assert desc.name not in self._module_name_to_description_map 877 self._module_name_to_description_map[desc.name] = desc 878 879 def find_module_by_name(self, name): 880 return self._module_name_to_description_map.get(name, None) 881 882 def __str__(self): 883 s = "{} modules:\n".format(self.num_modules) 884 for module_desc in sorted( 885 self._module_name_to_description_map.values(), key=lambda v: v.start_addr 886 ): 887 s += str(module_desc) + "\n" 888 return s 889 890 @property 891 def num_modules(self): 892 return len(self._module_name_to_description_map) 893 894 @property 895 def modules(self): 896 return set(self._module_name_to_description_map.values()) 897 898 def get_module_path_for_symbolication(self, module_name, proxy, validate_uuid): 899 module_desc = self.find_module_by_name(module_name) 900 if module_desc is None: 901 return None 902 # Allow a plug-in to change the module description to make it 903 # suitable for symbolication or avoid symbolication altogether. 904 module_desc = proxy.filter_module_desc(module_desc) 905 if module_desc is None: 906 return None 907 if validate_uuid: 908 logging.debug( 909 "Validating UUID of {}".format( 910 module_desc.module_path_for_symbolization 911 ) 912 ) 913 try: 914 uuid = get_uuid_from_binary( 915 module_desc.module_path_for_symbolization, arch=module_desc.arch 916 ) 917 if uuid != module_desc.uuid: 918 logging.warning( 919 "Detected UUID mismatch {} != {}".format(uuid, module_desc.uuid) 920 ) 921 # UUIDs don't match. Tell client to not symbolize this. 922 return None 923 except GetUUIDFromBinaryException as e: 924 logging.error("Failed to get binary from UUID: %s", str(e)) 925 return None 926 else: 927 logging.warning( 928 "Skipping validation of UUID of {}".format( 929 module_desc.module_path_for_symbolization 930 ) 931 ) 932 return module_desc.module_path_for_symbolization 933 934 @staticmethod 935 def parse_from_file(module_map_path): 936 if not os.path.exists(module_map_path): 937 raise Exception('module map "{}" does not exist'.format(module_map_path)) 938 with open(module_map_path, "r") as f: 939 mm = None 940 # E.g. 941 # 0x2db4000-0x102ddc000 /path/to (arm64) <0D6BBDE0-FF90-3680-899D-8E6F9528E04C> 942 hex_regex = lambda name: r"0x(?P<" + name + r">[0-9a-f]+)" 943 module_path_regex = r"(?P<path>.+)" 944 arch_regex = r"\((?P<arch>.+)\)" 945 uuid_regex = r"<(?P<uuid>[0-9A-Z-]+)>" 946 line_regex = r"^{}-{}\s+{}\s+{}\s+{}".format( 947 hex_regex("start_addr"), 948 hex_regex("end_addr"), 949 module_path_regex, 950 arch_regex, 951 uuid_regex, 952 ) 953 matcher = re.compile(line_regex) 954 line_num = 0 955 line = "dummy" 956 while line != "": 957 line = f.readline() 958 line_num += 1 959 if mm is None: 960 if line.startswith("Process module map:"): 961 mm = ModuleMap() 962 continue 963 if line.startswith("End of module map"): 964 break 965 m_obj = matcher.match(line) 966 if not m_obj: 967 raise Exception( 968 'Failed to parse line {} "{}"'.format(line_num, line) 969 ) 970 arch = m_obj.group("arch") 971 start_addr = int(m_obj.group("start_addr"), base=16) 972 end_addr = int(m_obj.group("end_addr"), base=16) 973 module_path = m_obj.group("path") 974 uuid = m_obj.group("uuid") 975 module_desc = ModuleDesc( 976 name=os.path.basename(module_path), 977 arch=arch, 978 start_addr=start_addr, 979 end_addr=end_addr, 980 module_path=module_path, 981 uuid=uuid, 982 ) 983 mm.add_module(module_desc) 984 if mm is not None: 985 logging.debug( 986 'Loaded Module map from "{}":\n{}'.format(f.name, str(mm)) 987 ) 988 return mm 989 990 991class SysRootFilterPlugIn(AsanSymbolizerPlugIn): 992 """ 993 Simple plug-in to add sys root prefix to all binary paths 994 used for symbolication. 995 """ 996 997 def __init__(self): 998 self.sysroot_path = "" 999 1000 def register_cmdline_args(self, parser): 1001 parser.add_argument( 1002 "-s", 1003 dest="sys_root", 1004 metavar="SYSROOT", 1005 help="set path to sysroot for sanitized binaries", 1006 ) 1007 1008 def process_cmdline_args(self, pargs): 1009 if pargs.sys_root is None: 1010 # Not being used so remove ourselves. 1011 return False 1012 self.sysroot_path = pargs.sys_root 1013 return True 1014 1015 def filter_binary_path(self, path): 1016 return self.sysroot_path + path 1017 1018 1019class ModuleMapPlugIn(AsanSymbolizerPlugIn): 1020 def __init__(self): 1021 self._module_map = None 1022 self._uuid_validation = True 1023 1024 def register_cmdline_args(self, parser): 1025 parser.add_argument( 1026 "--module-map", 1027 help="Path to text file containing module map" 1028 "output. See print_module_map ASan option.", 1029 ) 1030 parser.add_argument( 1031 "--skip-uuid-validation", 1032 default=False, 1033 action="store_true", 1034 help="Skips validating UUID of modules using otool.", 1035 ) 1036 1037 def process_cmdline_args(self, pargs): 1038 if not pargs.module_map: 1039 return False 1040 self._module_map = ModuleMap.parse_from_file(args.module_map) 1041 if self._module_map is None: 1042 msg = "Failed to find module map" 1043 logging.error(msg) 1044 raise Exception(msg) 1045 self._uuid_validation = not pargs.skip_uuid_validation 1046 return True 1047 1048 def filter_binary_path(self, binary_path): 1049 if os.path.isabs(binary_path): 1050 # This is a binary path so transform into 1051 # a module name 1052 module_name = os.path.basename(binary_path) 1053 else: 1054 module_name = binary_path 1055 return self._module_map.get_module_path_for_symbolication( 1056 module_name, self.proxy, self._uuid_validation 1057 ) 1058 1059 1060def add_logging_args(parser): 1061 parser.add_argument( 1062 "--log-dest", 1063 default=None, 1064 help="Destination path for script logging (default stderr).", 1065 ) 1066 parser.add_argument( 1067 "--log-level", 1068 choices=["debug", "info", "warning", "error", "critical"], 1069 default="info", 1070 help="Log level for script (default: %(default)s).", 1071 ) 1072 1073 1074def setup_logging(): 1075 # Set up a parser just for parsing the logging arguments. 1076 # This is necessary because logging should be configured before we 1077 # perform the main argument parsing. 1078 parser = argparse.ArgumentParser(add_help=False) 1079 add_logging_args(parser) 1080 pargs, unparsed_args = parser.parse_known_args() 1081 1082 log_level = getattr(logging, pargs.log_level.upper()) 1083 if log_level == logging.DEBUG: 1084 log_format = ( 1085 "%(levelname)s: [%(funcName)s() %(filename)s:%(lineno)d] %(message)s" 1086 ) 1087 else: 1088 log_format = "%(levelname)s: %(message)s" 1089 basic_config = {"level": log_level, "format": log_format} 1090 log_dest = pargs.log_dest 1091 if log_dest: 1092 basic_config["filename"] = log_dest 1093 logging.basicConfig(**basic_config) 1094 logging.debug( 1095 'Logging level set to "{}" and directing output to "{}"'.format( 1096 pargs.log_level, "stderr" if log_dest is None else log_dest 1097 ) 1098 ) 1099 return unparsed_args 1100 1101 1102def add_load_plugin_args(parser): 1103 parser.add_argument("-p", "--plugins", help="Load plug-in", nargs="+", default=[]) 1104 1105 1106def setup_plugins(plugin_proxy, args): 1107 parser = argparse.ArgumentParser(add_help=False) 1108 add_load_plugin_args(parser) 1109 pargs, unparsed_args = parser.parse_known_args() 1110 for plugin_path in pargs.plugins: 1111 plugin_proxy.load_plugin_from_file(plugin_path) 1112 # Add built-in plugins. 1113 plugin_proxy.add_plugin(ModuleMapPlugIn()) 1114 plugin_proxy.add_plugin(SysRootFilterPlugIn()) 1115 return unparsed_args 1116 1117 1118if __name__ == "__main__": 1119 remaining_args = setup_logging() 1120 with AsanSymbolizerPlugInProxy() as plugin_proxy: 1121 remaining_args = setup_plugins(plugin_proxy, remaining_args) 1122 parser = argparse.ArgumentParser( 1123 formatter_class=argparse.RawDescriptionHelpFormatter, 1124 description="ASan symbolization script", 1125 epilog=__doc__, 1126 ) 1127 parser.add_argument( 1128 "path_to_cut", 1129 nargs="*", 1130 help="pattern to be cut from the result file path ", 1131 ) 1132 parser.add_argument( 1133 "-d", "--demangle", action="store_true", help="demangle function names" 1134 ) 1135 parser.add_argument( 1136 "-c", metavar="CROSS_COMPILE", help="set prefix for binutils" 1137 ) 1138 parser.add_argument( 1139 "-l", 1140 "--logfile", 1141 default=sys.stdin, 1142 type=argparse.FileType("r"), 1143 help="set log file name to parse, default is stdin", 1144 ) 1145 parser.add_argument( 1146 "--force-system-symbolizer", 1147 action="store_true", 1148 help="don't use llvm-symbolizer", 1149 ) 1150 # Add logging arguments so that `--help` shows them. 1151 add_logging_args(parser) 1152 # Add load plugin arguments so that `--help` shows them. 1153 add_load_plugin_args(parser) 1154 plugin_proxy.register_cmdline_args(parser) 1155 args = parser.parse_args(remaining_args) 1156 plugin_proxy.process_cmdline_args(args) 1157 if args.path_to_cut: 1158 fix_filename_patterns = args.path_to_cut 1159 if args.demangle: 1160 demangle = True 1161 if args.c: 1162 binutils_prefix = args.c 1163 if args.logfile: 1164 logfile = args.logfile 1165 else: 1166 logfile = sys.stdin 1167 if args.force_system_symbolizer: 1168 force_system_symbolizer = True 1169 if force_system_symbolizer: 1170 assert allow_system_symbolizer 1171 loop = SymbolizationLoop(plugin_proxy) 1172 loop.process_logfile() 1173