xref: /netbsd-src/external/apache2/llvm/dist/libcxx/utils/google-benchmark/tools/strip_asm.py (revision 4d6fc14bc9b0c5bf3e30be318c143ee82cadd108)
1*4d6fc14bSjoerg#!/usr/bin/env python
2*4d6fc14bSjoerg
3*4d6fc14bSjoerg"""
4*4d6fc14bSjoergstrip_asm.py - Cleanup ASM output for the specified file
5*4d6fc14bSjoerg"""
6*4d6fc14bSjoerg
7*4d6fc14bSjoergfrom argparse import ArgumentParser
8*4d6fc14bSjoergimport sys
9*4d6fc14bSjoergimport os
10*4d6fc14bSjoergimport re
11*4d6fc14bSjoerg
12*4d6fc14bSjoergdef find_used_labels(asm):
13*4d6fc14bSjoerg    found = set()
14*4d6fc14bSjoerg    label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
15*4d6fc14bSjoerg    for l in asm.splitlines():
16*4d6fc14bSjoerg        m = label_re.match(l)
17*4d6fc14bSjoerg        if m:
18*4d6fc14bSjoerg            found.add('.L%s' % m.group(1))
19*4d6fc14bSjoerg    return found
20*4d6fc14bSjoerg
21*4d6fc14bSjoerg
22*4d6fc14bSjoergdef normalize_labels(asm):
23*4d6fc14bSjoerg    decls = set()
24*4d6fc14bSjoerg    label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
25*4d6fc14bSjoerg    for l in asm.splitlines():
26*4d6fc14bSjoerg        m = label_decl.match(l)
27*4d6fc14bSjoerg        if m:
28*4d6fc14bSjoerg            decls.add(m.group(0))
29*4d6fc14bSjoerg    if len(decls) == 0:
30*4d6fc14bSjoerg        return asm
31*4d6fc14bSjoerg    needs_dot = next(iter(decls))[0] != '.'
32*4d6fc14bSjoerg    if not needs_dot:
33*4d6fc14bSjoerg        return asm
34*4d6fc14bSjoerg    for ld in decls:
35*4d6fc14bSjoerg        asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
36*4d6fc14bSjoerg    return asm
37*4d6fc14bSjoerg
38*4d6fc14bSjoerg
39*4d6fc14bSjoergdef transform_labels(asm):
40*4d6fc14bSjoerg    asm = normalize_labels(asm)
41*4d6fc14bSjoerg    used_decls = find_used_labels(asm)
42*4d6fc14bSjoerg    new_asm = ''
43*4d6fc14bSjoerg    label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
44*4d6fc14bSjoerg    for l in asm.splitlines():
45*4d6fc14bSjoerg        m = label_decl.match(l)
46*4d6fc14bSjoerg        if not m or m.group(0) in used_decls:
47*4d6fc14bSjoerg            new_asm += l
48*4d6fc14bSjoerg            new_asm += '\n'
49*4d6fc14bSjoerg    return new_asm
50*4d6fc14bSjoerg
51*4d6fc14bSjoerg
52*4d6fc14bSjoergdef is_identifier(tk):
53*4d6fc14bSjoerg    if len(tk) == 0:
54*4d6fc14bSjoerg        return False
55*4d6fc14bSjoerg    first = tk[0]
56*4d6fc14bSjoerg    if not first.isalpha() and first != '_':
57*4d6fc14bSjoerg        return False
58*4d6fc14bSjoerg    for i in range(1, len(tk)):
59*4d6fc14bSjoerg        c = tk[i]
60*4d6fc14bSjoerg        if not c.isalnum() and c != '_':
61*4d6fc14bSjoerg            return False
62*4d6fc14bSjoerg    return True
63*4d6fc14bSjoerg
64*4d6fc14bSjoergdef process_identifiers(l):
65*4d6fc14bSjoerg    """
66*4d6fc14bSjoerg    process_identifiers - process all identifiers and modify them to have
67*4d6fc14bSjoerg    consistent names across all platforms; specifically across ELF and MachO.
68*4d6fc14bSjoerg    For example, MachO inserts an additional understore at the beginning of
69*4d6fc14bSjoerg    names. This function removes that.
70*4d6fc14bSjoerg    """
71*4d6fc14bSjoerg    parts = re.split(r'([a-zA-Z0-9_]+)', l)
72*4d6fc14bSjoerg    new_line = ''
73*4d6fc14bSjoerg    for tk in parts:
74*4d6fc14bSjoerg        if is_identifier(tk):
75*4d6fc14bSjoerg            if tk.startswith('__Z'):
76*4d6fc14bSjoerg                tk = tk[1:]
77*4d6fc14bSjoerg            elif tk.startswith('_') and len(tk) > 1 and \
78*4d6fc14bSjoerg                    tk[1].isalpha() and tk[1] != 'Z':
79*4d6fc14bSjoerg                tk = tk[1:]
80*4d6fc14bSjoerg        new_line += tk
81*4d6fc14bSjoerg    return new_line
82*4d6fc14bSjoerg
83*4d6fc14bSjoerg
84*4d6fc14bSjoergdef process_asm(asm):
85*4d6fc14bSjoerg    """
86*4d6fc14bSjoerg    Strip the ASM of unwanted directives and lines
87*4d6fc14bSjoerg    """
88*4d6fc14bSjoerg    new_contents = ''
89*4d6fc14bSjoerg    asm = transform_labels(asm)
90*4d6fc14bSjoerg
91*4d6fc14bSjoerg    # TODO: Add more things we want to remove
92*4d6fc14bSjoerg    discard_regexes = [
93*4d6fc14bSjoerg        re.compile("\s+\..*$"), # directive
94*4d6fc14bSjoerg        re.compile("\s*#(NO_APP|APP)$"), #inline ASM
95*4d6fc14bSjoerg        re.compile("\s*#.*$"), # comment line
96*4d6fc14bSjoerg        re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
97*4d6fc14bSjoerg        re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
98*4d6fc14bSjoerg    ]
99*4d6fc14bSjoerg    keep_regexes = [
100*4d6fc14bSjoerg
101*4d6fc14bSjoerg    ]
102*4d6fc14bSjoerg    fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
103*4d6fc14bSjoerg    for l in asm.splitlines():
104*4d6fc14bSjoerg        # Remove Mach-O attribute
105*4d6fc14bSjoerg        l = l.replace('@GOTPCREL', '')
106*4d6fc14bSjoerg        add_line = True
107*4d6fc14bSjoerg        for reg in discard_regexes:
108*4d6fc14bSjoerg            if reg.match(l) is not None:
109*4d6fc14bSjoerg                add_line = False
110*4d6fc14bSjoerg                break
111*4d6fc14bSjoerg        for reg in keep_regexes:
112*4d6fc14bSjoerg            if reg.match(l) is not None:
113*4d6fc14bSjoerg                add_line = True
114*4d6fc14bSjoerg                break
115*4d6fc14bSjoerg        if add_line:
116*4d6fc14bSjoerg            if fn_label_def.match(l) and len(new_contents) != 0:
117*4d6fc14bSjoerg                new_contents += '\n'
118*4d6fc14bSjoerg            l = process_identifiers(l)
119*4d6fc14bSjoerg            new_contents += l
120*4d6fc14bSjoerg            new_contents += '\n'
121*4d6fc14bSjoerg    return new_contents
122*4d6fc14bSjoerg
123*4d6fc14bSjoergdef main():
124*4d6fc14bSjoerg    parser = ArgumentParser(
125*4d6fc14bSjoerg        description='generate a stripped assembly file')
126*4d6fc14bSjoerg    parser.add_argument(
127*4d6fc14bSjoerg        'input', metavar='input', type=str, nargs=1,
128*4d6fc14bSjoerg        help='An input assembly file')
129*4d6fc14bSjoerg    parser.add_argument(
130*4d6fc14bSjoerg        'out', metavar='output', type=str, nargs=1,
131*4d6fc14bSjoerg        help='The output file')
132*4d6fc14bSjoerg    args, unknown_args = parser.parse_known_args()
133*4d6fc14bSjoerg    input = args.input[0]
134*4d6fc14bSjoerg    output = args.out[0]
135*4d6fc14bSjoerg    if not os.path.isfile(input):
136*4d6fc14bSjoerg        print(("ERROR: input file '%s' does not exist") % input)
137*4d6fc14bSjoerg        sys.exit(1)
138*4d6fc14bSjoerg    contents = None
139*4d6fc14bSjoerg    with open(input, 'r') as f:
140*4d6fc14bSjoerg        contents = f.read()
141*4d6fc14bSjoerg    new_contents = process_asm(contents)
142*4d6fc14bSjoerg    with open(output, 'w') as f:
143*4d6fc14bSjoerg        f.write(new_contents)
144*4d6fc14bSjoerg
145*4d6fc14bSjoerg
146*4d6fc14bSjoergif __name__ == '__main__':
147*4d6fc14bSjoerg    main()
148*4d6fc14bSjoerg
149*4d6fc14bSjoerg# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
150*4d6fc14bSjoerg# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
151*4d6fc14bSjoerg# kate: indent-mode python; remove-trailing-spaces modified;
152