xref: /llvm-project/bolt/test/link_fdata.py (revision 97025bd9d5b32f984f07d6ae20a3ce6ddb3fbe2a)
1#!/usr/bin/env python3
2
3"""
4This script reads the input from stdin, extracts all lines starting with
5"# FDATA: " (or a given prefix instead of "FDATA"), parses the directives,
6replaces symbol names ("#name#") with either symbol values or with offsets from
7respective anchor symbols, and prints the resulting file to stdout.
8"""
9
10import argparse
11import subprocess
12import sys
13import re
14
15parser = argparse.ArgumentParser()
16parser.add_argument("input")
17parser.add_argument("objfile", help="Object file to extract symbol values from")
18parser.add_argument("output")
19parser.add_argument("prefix", nargs="?", default="FDATA", help="Custom FDATA prefix")
20parser.add_argument("--nmtool", default="nm", help="Path to nm tool")
21parser.add_argument("--no-lbr", action="store_true")
22parser.add_argument("--no-redefine", action="store_true")
23
24args = parser.parse_args()
25
26# Regexes to extract FDATA lines from input and parse FDATA and pre-aggregated
27# profile data
28prefix_pat = re.compile(f"^# {args.prefix}: (.*)")
29
30# FDATA records:
31# <is symbol?> <closest elf symbol or DSO name> <relative FROM address>
32# <is symbol?> <closest elf symbol or DSO name> <relative TO address>
33# <number of mispredictions> <number of branches>
34fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
35
36# Pre-aggregated profile:
37# {B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> <count>
38# [<mispred_count>]
39preagg_pat = re.compile(r"(?P<type>[BFf]) (?P<offsets_count>.*)")
40
41# No-LBR profile:
42# <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
43nolbr_pat = re.compile(r"([01].*) (?P<count>\d+)")
44
45# Replacement symbol: #symname#
46replace_pat = re.compile(r"#(?P<symname>[^#]+)#")
47
48# Read input and construct the representation of fdata expressions
49# as (src_tuple, dst_tuple, mispred_count, exec_count) tuples, where src and dst
50# are represented as (is_sym, anchor, offset) tuples
51exprs = []
52with open(args.input, "r") as f:
53    for line in f.readlines():
54        prefix_match = prefix_pat.match(line)
55        if not prefix_match:
56            continue
57        profile_line = prefix_match.group(1)
58        fdata_match = fdata_pat.match(profile_line)
59        preagg_match = preagg_pat.match(profile_line)
60        nolbr_match = nolbr_pat.match(profile_line)
61        if fdata_match:
62            src_dst, execnt, mispred = fdata_match.groups()
63            # Split by whitespaces not preceded by a backslash (negative lookbehind)
64            chunks = re.split(r"(?<!\\) +", src_dst)
65            # Check if the number of records separated by non-escaped whitespace
66            # exactly matches the format.
67            assert (
68                len(chunks) == 6
69            ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
70            exprs.append(("FDATA", (*chunks, execnt, mispred)))
71        elif nolbr_match:
72            loc, count = nolbr_match.groups()
73            # Split by whitespaces not preceded by a backslash (negative lookbehind)
74            chunks = re.split(r"(?<!\\) +", loc)
75            # Check if the number of records separated by non-escaped whitespace
76            # exactly matches the format.
77            assert (
78                len(chunks) == 3
79            ), f"ERROR: wrong format/whitespaces must be escaped:\n{line}"
80            exprs.append(("NOLBR", (*chunks, count)))
81        elif preagg_match:
82            exprs.append(("PREAGG", preagg_match.groups()))
83        else:
84            exit("ERROR: unexpected input:\n%s" % line)
85
86# Read nm output: <symbol value> <symbol type> <symbol name>
87nm_output = subprocess.run(
88    [args.nmtool, "--defined-only", args.objfile], text=True, capture_output=True
89).stdout
90# Populate symbol map
91symbols = {}
92for symline in nm_output.splitlines():
93    symval, _, symname = symline.split(maxsplit=2)
94    if symname in symbols and args.no_redefine:
95        continue
96    symbols[symname] = symval
97
98
99def evaluate_symbol(issym, anchor, offsym):
100    sym_match = replace_pat.match(offsym)
101    if not sym_match:
102        # No need to evaluate symbol value, return as is
103        return f"{issym} {anchor} {offsym}"
104    symname = sym_match.group("symname")
105    assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
106    # Evaluate to an absolute offset if issym is false
107    if issym == "0":
108        return f"{issym} {anchor} {symbols[symname]}"
109    # Evaluate symbol against its anchor if issym is true
110    assert anchor in symbols, f"ERROR: symbol {anchor} is not defined in binary"
111    anchor_value = int(symbols[anchor], 16)
112    symbol_value = int(symbols[symname], 16)
113    sym_offset = symbol_value - anchor_value
114    return f'{issym} {anchor} {format(sym_offset, "x")}'
115
116
117def replace_symbol(matchobj):
118    """
119    Expects matchobj to only capture one group which contains the symbol name.
120    """
121    symname = matchobj.group("symname")
122    assert symname in symbols, f"ERROR: symbol {symname} is not defined in binary"
123    return symbols[symname]
124
125
126with open(args.output, "w", newline="\n") as f:
127    if args.no_lbr:
128        print("no_lbr", file=f)
129    for etype, expr in exprs:
130        if etype == "FDATA":
131            issym1, anchor1, offsym1, issym2, anchor2, offsym2, execnt, mispred = expr
132            print(
133                evaluate_symbol(issym1, anchor1, offsym1),
134                evaluate_symbol(issym2, anchor2, offsym2),
135                execnt,
136                mispred,
137                file=f,
138            )
139        elif etype == "NOLBR":
140            issym, anchor, offsym, count = expr
141            print(evaluate_symbol(issym, anchor, offsym), count, file=f)
142        elif etype == "PREAGG":
143            # Replace all symbols enclosed in ##
144            print(expr[0], re.sub(replace_pat, replace_symbol, expr[1]), file=f)
145        else:
146            exit("ERROR: unhandled expression type:\n%s" % etype)
147