13a16f216SAmir Ayupov#!/usr/bin/env python3 23a16f216SAmir Ayupov 33a16f216SAmir Ayupov""" 43a16f216SAmir AyupovThis script reads the input from stdin, extracts all lines starting with 53a16f216SAmir Ayupov"# FDATA: " (or a given prefix instead of "FDATA"), parses the directives, 63a16f216SAmir Ayupovreplaces symbol names ("#name#") with either symbol values or with offsets from 73a16f216SAmir Ayupovrespective anchor symbols, and prints the resulting file to stdout. 83a16f216SAmir Ayupov""" 93a16f216SAmir Ayupov 103a16f216SAmir Ayupovimport argparse 113a16f216SAmir Ayupovimport subprocess 123a16f216SAmir Ayupovimport sys 133a16f216SAmir Ayupovimport re 143a16f216SAmir Ayupov 153a16f216SAmir Ayupovparser = argparse.ArgumentParser() 163a16f216SAmir Ayupovparser.add_argument("input") 173a16f216SAmir Ayupovparser.add_argument("objfile", help="Object file to extract symbol values from") 183a16f216SAmir Ayupovparser.add_argument("output") 193a16f216SAmir Ayupovparser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix") 203a16f216SAmir Ayupovparser.add_argument("--nmtool", default="nm", help="Path to nm tool") 21f98ee40fSTobias Hietaparser.add_argument("--no-lbr", action="store_true") 22*97025bd9SAmir Ayupovparser.add_argument("--no-redefine", action="store_true") 233a16f216SAmir Ayupov 243a16f216SAmir Ayupovargs = parser.parse_args() 253a16f216SAmir Ayupov 263a16f216SAmir Ayupov# Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated 273a16f216SAmir Ayupov# profile data 283a16f216SAmir Ayupovprefix_pat = re.compile(f"^# {args.prefix}: (.*)") 293a16f216SAmir Ayupov 303a16f216SAmir Ayupov# FDATA records: 313a16f216SAmir Ayupov# <is symbol?> <closest elf symbol or DSO name> <relative FROM address> 323a16f216SAmir Ayupov# <is symbol?> <closest elf symbol or DSO name> <relative TO address> 333a16f216SAmir Ayupov# <number of mispredictions> <number of branches> 343a16f216SAmir Ayupovfdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)") 353a16f216SAmir Ayupov 363a16f216SAmir Ayupov# Pre-aggregated profile: 373a16f216SAmir Ayupov# {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count> 383a16f216SAmir Ayupov# [<mispred_count>] 393a16f216SAmir Ayupovpreagg_pat = re.compile(r"(?P<type>[BFf]) (?P<offsets_count>.*)") 403a16f216SAmir Ayupov 416b05a62aSAmir Ayupov# No-LBR profile: 426b05a62aSAmir Ayupov# <is symbol?> <closest elf symbol or DSO name> <relative address> <count> 436b05a62aSAmir Ayupovnolbr_pat = re.compile(r"([01].*) (?P<count>\d+)") 446b05a62aSAmir Ayupov 453a16f216SAmir Ayupov# Replacement symbol: #symname# 463a16f216SAmir Ayupovreplace_pat = re.compile(r"#(?P<symname>[^#]+)#") 473a16f216SAmir Ayupov 483a16f216SAmir Ayupov# Read input and construct the representation of fdata expressions 493a16f216SAmir Ayupov# as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst 503a16f216SAmir Ayupov# are represented as (is_sym, anchor, offset) tuples 513a16f216SAmir Ayupovexprs = [] 52f98ee40fSTobias Hietawith open(args.input, "r") as f: 533a16f216SAmir Ayupov for line in f.readlines(): 543a16f216SAmir Ayupov prefix_match = prefix_pat.match(line) 553a16f216SAmir Ayupov if not prefix_match: 563a16f216SAmir Ayupov continue 573a16f216SAmir Ayupov profile_line = prefix_match.group(1) 583a16f216SAmir Ayupov fdata_match = fdata_pat.match(profile_line) 593a16f216SAmir Ayupov preagg_match = preagg_pat.match(profile_line) 606b05a62aSAmir Ayupov nolbr_match = nolbr_pat.match(profile_line) 613a16f216SAmir Ayupov if fdata_match: 623a16f216SAmir Ayupov src_dst, execnt, mispred = fdata_match.groups() 633a16f216SAmir Ayupov # Split by whitespaces not preceded by a backslash (negative lookbehind) 64f98ee40fSTobias Hieta chunks = re.split(r"(?<!\\) +", src_dst) 653a16f216SAmir Ayupov # Check if the number of records separated by non-escaped whitespace 663a16f216SAmir Ayupov # exactly matches the format. 67f98ee40fSTobias Hieta assert ( 68f98ee40fSTobias Hieta len(chunks) == 6 69f98ee40fSTobias Hieta ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}" 70f98ee40fSTobias Hieta exprs.append(("FDATA", (*chunks, execnt, mispred))) 716b05a62aSAmir Ayupov elif nolbr_match: 726b05a62aSAmir Ayupov loc, count = nolbr_match.groups() 736b05a62aSAmir Ayupov # Split by whitespaces not preceded by a backslash (negative lookbehind) 74f98ee40fSTobias Hieta chunks = re.split(r"(?<!\\) +", loc) 756b05a62aSAmir Ayupov # Check if the number of records separated by non-escaped whitespace 766b05a62aSAmir Ayupov # exactly matches the format. 77f98ee40fSTobias Hieta assert ( 78f98ee40fSTobias Hieta len(chunks) == 3 79f98ee40fSTobias Hieta ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}" 80f98ee40fSTobias Hieta exprs.append(("NOLBR", (*chunks, count))) 813a16f216SAmir Ayupov elif preagg_match: 82f98ee40fSTobias Hieta exprs.append(("PREAGG", preagg_match.groups())) 833a16f216SAmir Ayupov else: 843a16f216SAmir Ayupov exit("ERROR: unexpected input:\n%s" % line) 853a16f216SAmir Ayupov 863a16f216SAmir Ayupov# Read nm output: <symbol value> <symbol type> <symbol name> 87f98ee40fSTobias Hietanm_output = subprocess.run( 88f98ee40fSTobias Hieta [args.nmtool, "--defined-only", args.objfile], text=True, capture_output=True 89f98ee40fSTobias Hieta).stdout 903a16f216SAmir Ayupov# Populate symbol map 913a16f216SAmir Ayupovsymbols = {} 923a16f216SAmir Ayupovfor symline in nm_output.splitlines(): 933a16f216SAmir Ayupov symval, _, symname = symline.split(maxsplit=2) 94*97025bd9SAmir Ayupov if symname in symbols and args.no_redefine: 95*97025bd9SAmir Ayupov continue 963a16f216SAmir Ayupov symbols[symname] = symval 973a16f216SAmir Ayupov 98f98ee40fSTobias Hieta 993a16f216SAmir Ayupovdef evaluate_symbol(issym, anchor, offsym): 1003a16f216SAmir Ayupov sym_match = replace_pat.match(offsym) 1013a16f216SAmir Ayupov if not sym_match: 1023a16f216SAmir Ayupov # No need to evaluate symbol value, return as is 103f98ee40fSTobias Hieta return f"{issym} {anchor} {offsym}" 104f98ee40fSTobias Hieta symname = sym_match.group("symname") 1053a16f216SAmir Ayupov assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary" 1063a16f216SAmir Ayupov # Evaluate to an absolute offset if issym is false 107f98ee40fSTobias Hieta if issym == "0": 108f98ee40fSTobias Hieta return f"{issym} {anchor} {symbols[symname]}" 1093a16f216SAmir Ayupov # Evaluate symbol against its anchor if issym is true 1103a16f216SAmir Ayupov assert anchor in symbols, f"ERROR: symbol {anchor} is not defined in binary" 1113a16f216SAmir Ayupov anchor_value = int(symbols[anchor], 16) 1123a16f216SAmir Ayupov symbol_value = int(symbols[symname], 16) 1133a16f216SAmir Ayupov sym_offset = symbol_value - anchor_value 1143a16f216SAmir Ayupov return f'{issym} {anchor} {format(sym_offset, "x")}' 1153a16f216SAmir Ayupov 116f98ee40fSTobias Hieta 1173a16f216SAmir Ayupovdef replace_symbol(matchobj): 118f98ee40fSTobias Hieta """ 1193a16f216SAmir Ayupov Expects matchobj to only capture one group which contains the symbol name. 120f98ee40fSTobias Hieta """ 121f98ee40fSTobias Hieta symname = matchobj.group("symname") 1223a16f216SAmir Ayupov assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary" 1233a16f216SAmir Ayupov return symbols[symname] 1243a16f216SAmir Ayupov 125f98ee40fSTobias Hieta 126f98ee40fSTobias Hietawith open(args.output, "w", newline="\n") as f: 1276b05a62aSAmir Ayupov if args.no_lbr: 128f98ee40fSTobias Hieta print("no_lbr", file=f) 1293a16f216SAmir Ayupov for etype, expr in exprs: 130f98ee40fSTobias Hieta if etype == "FDATA": 1313a16f216SAmir Ayupov issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr 132f98ee40fSTobias Hieta print( 133f98ee40fSTobias Hieta evaluate_symbol(issym1, anchor1, offsym1), 1343a16f216SAmir Ayupov evaluate_symbol(issym2, anchor2, offsym2), 135f98ee40fSTobias Hieta execnt, 136f98ee40fSTobias Hieta mispred, 137f98ee40fSTobias Hieta file=f, 138f98ee40fSTobias Hieta ) 139f98ee40fSTobias Hieta elif etype == "NOLBR": 1406b05a62aSAmir Ayupov issym, anchor, offsym, count = expr 1416b05a62aSAmir Ayupov print(evaluate_symbol(issym, anchor, offsym), count, file=f) 142f98ee40fSTobias Hieta elif etype == "PREAGG": 1433a16f216SAmir Ayupov # Replace all symbols enclosed in ## 144f98ee40fSTobias Hieta print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]), file=f) 1453a16f216SAmir Ayupov else: 1463a16f216SAmir Ayupov exit("ERROR: unhandled expression type:\n%s" % etype) 147