1#!/usr/bin/env python3 2 3""" 4This script reads the input from stdin, extracts all lines starting with 5"# FDATA: " (or a given prefix instead of "FDATA"), parses the directives, 6replaces symbol names ("#name#") with either symbol values or with offsets from 7respective anchor symbols, and prints the resulting file to stdout. 8""" 9 10import argparse 11import subprocess 12import sys 13import re 14 15parser = argparse.ArgumentParser() 16parser.add_argument("input") 17parser.add_argument("objfile", help="Object file to extract symbol values from") 18parser.add_argument("output") 19parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix") 20parser.add_argument("--nmtool", default="nm", help="Path to nm tool") 21parser.add_argument("--no-lbr", action="store_true") 22parser.add_argument("--no-redefine", action="store_true") 23 24args = parser.parse_args() 25 26# Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated 27# profile data 28prefix_pat = re.compile(f"^# {args.prefix}: (.*)") 29 30# FDATA records: 31# <is symbol?> <closest elf symbol or DSO name> <relative FROM address> 32# <is symbol?> <closest elf symbol or DSO name> <relative TO address> 33# <number of mispredictions> <number of branches> 34fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)") 35 36# Pre-aggregated profile: 37# {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count> 38# [<mispred_count>] 39preagg_pat = re.compile(r"(?P<type>[BFf]) (?P<offsets_count>.*)") 40 41# No-LBR profile: 42# <is symbol?> <closest elf symbol or DSO name> <relative address> <count> 43nolbr_pat = re.compile(r"([01].*) (?P<count>\d+)") 44 45# Replacement symbol: #symname# 46replace_pat = re.compile(r"#(?P<symname>[^#]+)#") 47 48# Read input and construct the representation of fdata expressions 49# as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst 50# are represented as (is_sym, anchor, offset) tuples 51exprs = [] 52with open(args.input, "r") as f: 53 for line in f.readlines(): 54 prefix_match = prefix_pat.match(line) 55 if not prefix_match: 56 continue 57 profile_line = prefix_match.group(1) 58 fdata_match = fdata_pat.match(profile_line) 59 preagg_match = preagg_pat.match(profile_line) 60 nolbr_match = nolbr_pat.match(profile_line) 61 if fdata_match: 62 src_dst, execnt, mispred = fdata_match.groups() 63 # Split by whitespaces not preceded by a backslash (negative lookbehind) 64 chunks = re.split(r"(?<!\\) +", src_dst) 65 # Check if the number of records separated by non-escaped whitespace 66 # exactly matches the format. 67 assert ( 68 len(chunks) == 6 69 ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}" 70 exprs.append(("FDATA", (*chunks, execnt, mispred))) 71 elif nolbr_match: 72 loc, count = nolbr_match.groups() 73 # Split by whitespaces not preceded by a backslash (negative lookbehind) 74 chunks = re.split(r"(?<!\\) +", loc) 75 # Check if the number of records separated by non-escaped whitespace 76 # exactly matches the format. 77 assert ( 78 len(chunks) == 3 79 ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}" 80 exprs.append(("NOLBR", (*chunks, count))) 81 elif preagg_match: 82 exprs.append(("PREAGG", preagg_match.groups())) 83 else: 84 exit("ERROR: unexpected input:\n%s" % line) 85 86# Read nm output: <symbol value> <symbol type> <symbol name> 87nm_output = subprocess.run( 88 [args.nmtool, "--defined-only", args.objfile], text=True, capture_output=True 89).stdout 90# Populate symbol map 91symbols = {} 92for symline in nm_output.splitlines(): 93 symval, _, symname = symline.split(maxsplit=2) 94 if symname in symbols and args.no_redefine: 95 continue 96 symbols[symname] = symval 97 98 99def evaluate_symbol(issym, anchor, offsym): 100 sym_match = replace_pat.match(offsym) 101 if not sym_match: 102 # No need to evaluate symbol value, return as is 103 return f"{issym} {anchor} {offsym}" 104 symname = sym_match.group("symname") 105 assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary" 106 # Evaluate to an absolute offset if issym is false 107 if issym == "0": 108 return f"{issym} {anchor} {symbols[symname]}" 109 # Evaluate symbol against its anchor if issym is true 110 assert anchor in symbols, f"ERROR: symbol {anchor} is not defined in binary" 111 anchor_value = int(symbols[anchor], 16) 112 symbol_value = int(symbols[symname], 16) 113 sym_offset = symbol_value - anchor_value 114 return f'{issym} {anchor} {format(sym_offset, "x")}' 115 116 117def replace_symbol(matchobj): 118 """ 119 Expects matchobj to only capture one group which contains the symbol name. 120 """ 121 symname = matchobj.group("symname") 122 assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary" 123 return symbols[symname] 124 125 126with open(args.output, "w", newline="\n") as f: 127 if args.no_lbr: 128 print("no_lbr", file=f) 129 for etype, expr in exprs: 130 if etype == "FDATA": 131 issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr 132 print( 133 evaluate_symbol(issym1, anchor1, offsym1), 134 evaluate_symbol(issym2, anchor2, offsym2), 135 execnt, 136 mispred, 137 file=f, 138 ) 139 elif etype == "NOLBR": 140 issym, anchor, offsym, count = expr 141 print(evaluate_symbol(issym, anchor, offsym), count, file=f) 142 elif etype == "PREAGG": 143 # Replace all symbols enclosed in ## 144 print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]), file=f) 145 else: 146 exit("ERROR: unhandled expression type:\n%s" % etype) 147