xref: /llvm-project/bolt/test/link_fdata.py (revision 97025bd9d5b32f984f07d6ae20a3ce6ddb3fbe2a)
13a16f216SAmir Ayupov#!/usr/bin/env python3
23a16f216SAmir Ayupov
33a16f216SAmir Ayupov"""
43a16f216SAmir AyupovThis script reads the input from stdin, extracts all lines starting with
53a16f216SAmir Ayupov"# FDATA: " (or a given prefix instead of "FDATA"), parses the directives,
63a16f216SAmir Ayupovreplaces symbol names ("#name#") with either symbol values or with offsets from
73a16f216SAmir Ayupovrespective anchor symbols, and prints the resulting file to stdout.
83a16f216SAmir Ayupov"""
93a16f216SAmir Ayupov
103a16f216SAmir Ayupovimport argparse
113a16f216SAmir Ayupovimport subprocess
123a16f216SAmir Ayupovimport sys
133a16f216SAmir Ayupovimport re
143a16f216SAmir Ayupov
153a16f216SAmir Ayupovparser = argparse.ArgumentParser()
163a16f216SAmir Ayupovparser.add_argument("input")
173a16f216SAmir Ayupovparser.add_argument("objfile", help="Object file to extract symbol values from")
183a16f216SAmir Ayupovparser.add_argument("output")
193a16f216SAmir Ayupovparser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix")
203a16f216SAmir Ayupovparser.add_argument("--nmtool", default="nm", help="Path to nm tool")
21f98ee40fSTobias Hietaparser.add_argument("--no-lbr", action="store_true")
22*97025bd9SAmir Ayupovparser.add_argument("--no-redefine", action="store_true")
233a16f216SAmir Ayupov
243a16f216SAmir Ayupovargs = parser.parse_args()
253a16f216SAmir Ayupov
263a16f216SAmir Ayupov# Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated
273a16f216SAmir Ayupov# profile data
283a16f216SAmir Ayupovprefix_pat = re.compile(f"^# {args.prefix}: (.*)")
293a16f216SAmir Ayupov
303a16f216SAmir Ayupov# FDATA records:
313a16f216SAmir Ayupov# <is symbol?> <closest elf symbol or DSO name> <relative FROM address>
323a16f216SAmir Ayupov# <is symbol?> <closest elf symbol or DSO name> <relative TO address>
333a16f216SAmir Ayupov# <number of mispredictions> <number of branches>
343a16f216SAmir Ayupovfdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
353a16f216SAmir Ayupov
363a16f216SAmir Ayupov# Pre-aggregated profile:
373a16f216SAmir Ayupov# {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count>
383a16f216SAmir Ayupov# [<mispred_count>]
393a16f216SAmir Ayupovpreagg_pat = re.compile(r"(?P<type>[BFf]) (?P<offsets_count>.*)")
403a16f216SAmir Ayupov
416b05a62aSAmir Ayupov# No-LBR profile:
426b05a62aSAmir Ayupov# <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
436b05a62aSAmir Ayupovnolbr_pat = re.compile(r"([01].*) (?P<count>\d+)")
446b05a62aSAmir Ayupov
453a16f216SAmir Ayupov# Replacement symbol: #symname#
463a16f216SAmir Ayupovreplace_pat = re.compile(r"#(?P<symname>[^#]+)#")
473a16f216SAmir Ayupov
483a16f216SAmir Ayupov# Read input and construct the representation of fdata expressions
493a16f216SAmir Ayupov# as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst
503a16f216SAmir Ayupov# are represented as (is_sym, anchor, offset) tuples
513a16f216SAmir Ayupovexprs = []
52f98ee40fSTobias Hietawith open(args.input, "r") as f:
533a16f216SAmir Ayupov    for line in f.readlines():
543a16f216SAmir Ayupov        prefix_match = prefix_pat.match(line)
553a16f216SAmir Ayupov        if not prefix_match:
563a16f216SAmir Ayupov            continue
573a16f216SAmir Ayupov        profile_line = prefix_match.group(1)
583a16f216SAmir Ayupov        fdata_match = fdata_pat.match(profile_line)
593a16f216SAmir Ayupov        preagg_match = preagg_pat.match(profile_line)
606b05a62aSAmir Ayupov        nolbr_match = nolbr_pat.match(profile_line)
613a16f216SAmir Ayupov        if fdata_match:
623a16f216SAmir Ayupov            src_dst, execnt, mispred = fdata_match.groups()
633a16f216SAmir Ayupov            # Split by whitespaces not preceded by a backslash (negative lookbehind)
64f98ee40fSTobias Hieta            chunks = re.split(r"(?<!\\) +", src_dst)
653a16f216SAmir Ayupov            # Check if the number of records separated by non-escaped whitespace
663a16f216SAmir Ayupov            # exactly matches the format.
67f98ee40fSTobias Hieta            assert (
68f98ee40fSTobias Hieta                len(chunks) == 6
69f98ee40fSTobias Hieta            ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
70f98ee40fSTobias Hieta            exprs.append(("FDATA", (*chunks, execnt, mispred)))
716b05a62aSAmir Ayupov        elif nolbr_match:
726b05a62aSAmir Ayupov            loc, count = nolbr_match.groups()
736b05a62aSAmir Ayupov            # Split by whitespaces not preceded by a backslash (negative lookbehind)
74f98ee40fSTobias Hieta            chunks = re.split(r"(?<!\\) +", loc)
756b05a62aSAmir Ayupov            # Check if the number of records separated by non-escaped whitespace
766b05a62aSAmir Ayupov            # exactly matches the format.
77f98ee40fSTobias Hieta            assert (
78f98ee40fSTobias Hieta                len(chunks) == 3
79f98ee40fSTobias Hieta            ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
80f98ee40fSTobias Hieta            exprs.append(("NOLBR", (*chunks, count)))
813a16f216SAmir Ayupov        elif preagg_match:
82f98ee40fSTobias Hieta            exprs.append(("PREAGG", preagg_match.groups()))
833a16f216SAmir Ayupov        else:
843a16f216SAmir Ayupov            exit("ERROR: unexpected input:\n%s" % line)
853a16f216SAmir Ayupov
863a16f216SAmir Ayupov# Read nm output: <symbol value> <symbol type> <symbol name>
87f98ee40fSTobias Hietanm_output = subprocess.run(
88f98ee40fSTobias Hieta    [args.nmtool, "--defined-only", args.objfile], text=True, capture_output=True
89f98ee40fSTobias Hieta).stdout
903a16f216SAmir Ayupov# Populate symbol map
913a16f216SAmir Ayupovsymbols = {}
923a16f216SAmir Ayupovfor symline in nm_output.splitlines():
933a16f216SAmir Ayupov    symval, _, symname = symline.split(maxsplit=2)
94*97025bd9SAmir Ayupov    if symname in symbols and args.no_redefine:
95*97025bd9SAmir Ayupov        continue
963a16f216SAmir Ayupov    symbols[symname] = symval
973a16f216SAmir Ayupov
98f98ee40fSTobias Hieta
993a16f216SAmir Ayupovdef evaluate_symbol(issym, anchor, offsym):
1003a16f216SAmir Ayupov    sym_match = replace_pat.match(offsym)
1013a16f216SAmir Ayupov    if not sym_match:
1023a16f216SAmir Ayupov        # No need to evaluate symbol value, return as is
103f98ee40fSTobias Hieta        return f"{issym} {anchor} {offsym}"
104f98ee40fSTobias Hieta    symname = sym_match.group("symname")
1053a16f216SAmir Ayupov    assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
1063a16f216SAmir Ayupov    # Evaluate to an absolute offset if issym is false
107f98ee40fSTobias Hieta    if issym == "0":
108f98ee40fSTobias Hieta        return f"{issym} {anchor} {symbols[symname]}"
1093a16f216SAmir Ayupov    # Evaluate symbol against its anchor if issym is true
1103a16f216SAmir Ayupov    assert anchor in symbols, f"ERROR: symbol {anchor} is not defined in binary"
1113a16f216SAmir Ayupov    anchor_value = int(symbols[anchor], 16)
1123a16f216SAmir Ayupov    symbol_value = int(symbols[symname], 16)
1133a16f216SAmir Ayupov    sym_offset = symbol_value - anchor_value
1143a16f216SAmir Ayupov    return f'{issym} {anchor} {format(sym_offset, "x")}'
1153a16f216SAmir Ayupov
116f98ee40fSTobias Hieta
1173a16f216SAmir Ayupovdef replace_symbol(matchobj):
118f98ee40fSTobias Hieta    """
1193a16f216SAmir Ayupov    Expects matchobj to only capture one group which contains the symbol name.
120f98ee40fSTobias Hieta    """
121f98ee40fSTobias Hieta    symname = matchobj.group("symname")
1223a16f216SAmir Ayupov    assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
1233a16f216SAmir Ayupov    return symbols[symname]
1243a16f216SAmir Ayupov
125f98ee40fSTobias Hieta
126f98ee40fSTobias Hietawith open(args.output, "w", newline="\n") as f:
1276b05a62aSAmir Ayupov    if args.no_lbr:
128f98ee40fSTobias Hieta        print("no_lbr", file=f)
1293a16f216SAmir Ayupov    for etype, expr in exprs:
130f98ee40fSTobias Hieta        if etype == "FDATA":
1313a16f216SAmir Ayupov            issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr
132f98ee40fSTobias Hieta            print(
133f98ee40fSTobias Hieta                evaluate_symbol(issym1, anchor1, offsym1),
1343a16f216SAmir Ayupov                evaluate_symbol(issym2, anchor2, offsym2),
135f98ee40fSTobias Hieta                execnt,
136f98ee40fSTobias Hieta                mispred,
137f98ee40fSTobias Hieta                file=f,
138f98ee40fSTobias Hieta            )
139f98ee40fSTobias Hieta        elif etype == "NOLBR":
1406b05a62aSAmir Ayupov            issym, anchor, offsym, count = expr
1416b05a62aSAmir Ayupov            print(evaluate_symbol(issym, anchor, offsym), count, file=f)
142f98ee40fSTobias Hieta        elif etype == "PREAGG":
1433a16f216SAmir Ayupov            # Replace all symbols enclosed in ##
144f98ee40fSTobias Hieta            print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]), file=f)
1453a16f216SAmir Ayupov        else:
1463a16f216SAmir Ayupov            exit("ERROR: unhandled expression type:\n%s" % etype)
147