1*4d6fc14bSjoerg#!/usr/bin/env python 2*4d6fc14bSjoerg 3*4d6fc14bSjoerg""" 4*4d6fc14bSjoergstrip_asm.py - Cleanup ASM output for the specified file 5*4d6fc14bSjoerg""" 6*4d6fc14bSjoerg 7*4d6fc14bSjoergfrom argparse import ArgumentParser 8*4d6fc14bSjoergimport sys 9*4d6fc14bSjoergimport os 10*4d6fc14bSjoergimport re 11*4d6fc14bSjoerg 12*4d6fc14bSjoergdef find_used_labels(asm): 13*4d6fc14bSjoerg found = set() 14*4d6fc14bSjoerg label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)") 15*4d6fc14bSjoerg for l in asm.splitlines(): 16*4d6fc14bSjoerg m = label_re.match(l) 17*4d6fc14bSjoerg if m: 18*4d6fc14bSjoerg found.add('.L%s' % m.group(1)) 19*4d6fc14bSjoerg return found 20*4d6fc14bSjoerg 21*4d6fc14bSjoerg 22*4d6fc14bSjoergdef normalize_labels(asm): 23*4d6fc14bSjoerg decls = set() 24*4d6fc14bSjoerg label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") 25*4d6fc14bSjoerg for l in asm.splitlines(): 26*4d6fc14bSjoerg m = label_decl.match(l) 27*4d6fc14bSjoerg if m: 28*4d6fc14bSjoerg decls.add(m.group(0)) 29*4d6fc14bSjoerg if len(decls) == 0: 30*4d6fc14bSjoerg return asm 31*4d6fc14bSjoerg needs_dot = next(iter(decls))[0] != '.' 32*4d6fc14bSjoerg if not needs_dot: 33*4d6fc14bSjoerg return asm 34*4d6fc14bSjoerg for ld in decls: 35*4d6fc14bSjoerg asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm) 36*4d6fc14bSjoerg return asm 37*4d6fc14bSjoerg 38*4d6fc14bSjoerg 39*4d6fc14bSjoergdef transform_labels(asm): 40*4d6fc14bSjoerg asm = normalize_labels(asm) 41*4d6fc14bSjoerg used_decls = find_used_labels(asm) 42*4d6fc14bSjoerg new_asm = '' 43*4d6fc14bSjoerg label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)") 44*4d6fc14bSjoerg for l in asm.splitlines(): 45*4d6fc14bSjoerg m = label_decl.match(l) 46*4d6fc14bSjoerg if not m or m.group(0) in used_decls: 47*4d6fc14bSjoerg new_asm += l 48*4d6fc14bSjoerg new_asm += '\n' 49*4d6fc14bSjoerg return new_asm 50*4d6fc14bSjoerg 51*4d6fc14bSjoerg 52*4d6fc14bSjoergdef is_identifier(tk): 53*4d6fc14bSjoerg if len(tk) == 0: 54*4d6fc14bSjoerg return False 55*4d6fc14bSjoerg first = tk[0] 56*4d6fc14bSjoerg if not first.isalpha() and first != '_': 57*4d6fc14bSjoerg return False 58*4d6fc14bSjoerg for i in range(1, len(tk)): 59*4d6fc14bSjoerg c = tk[i] 60*4d6fc14bSjoerg if not c.isalnum() and c != '_': 61*4d6fc14bSjoerg return False 62*4d6fc14bSjoerg return True 63*4d6fc14bSjoerg 64*4d6fc14bSjoergdef process_identifiers(l): 65*4d6fc14bSjoerg """ 66*4d6fc14bSjoerg process_identifiers - process all identifiers and modify them to have 67*4d6fc14bSjoerg consistent names across all platforms; specifically across ELF and MachO. 68*4d6fc14bSjoerg For example, MachO inserts an additional understore at the beginning of 69*4d6fc14bSjoerg names. This function removes that. 70*4d6fc14bSjoerg """ 71*4d6fc14bSjoerg parts = re.split(r'([a-zA-Z0-9_]+)', l) 72*4d6fc14bSjoerg new_line = '' 73*4d6fc14bSjoerg for tk in parts: 74*4d6fc14bSjoerg if is_identifier(tk): 75*4d6fc14bSjoerg if tk.startswith('__Z'): 76*4d6fc14bSjoerg tk = tk[1:] 77*4d6fc14bSjoerg elif tk.startswith('_') and len(tk) > 1 and \ 78*4d6fc14bSjoerg tk[1].isalpha() and tk[1] != 'Z': 79*4d6fc14bSjoerg tk = tk[1:] 80*4d6fc14bSjoerg new_line += tk 81*4d6fc14bSjoerg return new_line 82*4d6fc14bSjoerg 83*4d6fc14bSjoerg 84*4d6fc14bSjoergdef process_asm(asm): 85*4d6fc14bSjoerg """ 86*4d6fc14bSjoerg Strip the ASM of unwanted directives and lines 87*4d6fc14bSjoerg """ 88*4d6fc14bSjoerg new_contents = '' 89*4d6fc14bSjoerg asm = transform_labels(asm) 90*4d6fc14bSjoerg 91*4d6fc14bSjoerg # TODO: Add more things we want to remove 92*4d6fc14bSjoerg discard_regexes = [ 93*4d6fc14bSjoerg re.compile("\s+\..*$"), # directive 94*4d6fc14bSjoerg re.compile("\s*#(NO_APP|APP)$"), #inline ASM 95*4d6fc14bSjoerg re.compile("\s*#.*$"), # comment line 96*4d6fc14bSjoerg re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive 97*4d6fc14bSjoerg re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"), 98*4d6fc14bSjoerg ] 99*4d6fc14bSjoerg keep_regexes = [ 100*4d6fc14bSjoerg 101*4d6fc14bSjoerg ] 102*4d6fc14bSjoerg fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:") 103*4d6fc14bSjoerg for l in asm.splitlines(): 104*4d6fc14bSjoerg # Remove Mach-O attribute 105*4d6fc14bSjoerg l = l.replace('@GOTPCREL', '') 106*4d6fc14bSjoerg add_line = True 107*4d6fc14bSjoerg for reg in discard_regexes: 108*4d6fc14bSjoerg if reg.match(l) is not None: 109*4d6fc14bSjoerg add_line = False 110*4d6fc14bSjoerg break 111*4d6fc14bSjoerg for reg in keep_regexes: 112*4d6fc14bSjoerg if reg.match(l) is not None: 113*4d6fc14bSjoerg add_line = True 114*4d6fc14bSjoerg break 115*4d6fc14bSjoerg if add_line: 116*4d6fc14bSjoerg if fn_label_def.match(l) and len(new_contents) != 0: 117*4d6fc14bSjoerg new_contents += '\n' 118*4d6fc14bSjoerg l = process_identifiers(l) 119*4d6fc14bSjoerg new_contents += l 120*4d6fc14bSjoerg new_contents += '\n' 121*4d6fc14bSjoerg return new_contents 122*4d6fc14bSjoerg 123*4d6fc14bSjoergdef main(): 124*4d6fc14bSjoerg parser = ArgumentParser( 125*4d6fc14bSjoerg description='generate a stripped assembly file') 126*4d6fc14bSjoerg parser.add_argument( 127*4d6fc14bSjoerg 'input', metavar='input', type=str, nargs=1, 128*4d6fc14bSjoerg help='An input assembly file') 129*4d6fc14bSjoerg parser.add_argument( 130*4d6fc14bSjoerg 'out', metavar='output', type=str, nargs=1, 131*4d6fc14bSjoerg help='The output file') 132*4d6fc14bSjoerg args, unknown_args = parser.parse_known_args() 133*4d6fc14bSjoerg input = args.input[0] 134*4d6fc14bSjoerg output = args.out[0] 135*4d6fc14bSjoerg if not os.path.isfile(input): 136*4d6fc14bSjoerg print(("ERROR: input file '%s' does not exist") % input) 137*4d6fc14bSjoerg sys.exit(1) 138*4d6fc14bSjoerg contents = None 139*4d6fc14bSjoerg with open(input, 'r') as f: 140*4d6fc14bSjoerg contents = f.read() 141*4d6fc14bSjoerg new_contents = process_asm(contents) 142*4d6fc14bSjoerg with open(output, 'w') as f: 143*4d6fc14bSjoerg f.write(new_contents) 144*4d6fc14bSjoerg 145*4d6fc14bSjoerg 146*4d6fc14bSjoergif __name__ == '__main__': 147*4d6fc14bSjoerg main() 148*4d6fc14bSjoerg 149*4d6fc14bSjoerg# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 150*4d6fc14bSjoerg# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off; 151*4d6fc14bSjoerg# kate: indent-mode python; remove-trailing-spaces modified; 152