1*181254a7Smrg#!/usr/bin/env python 2*181254a7Smrg# -*- coding: utf-8 -*- 3*181254a7Smrg# 4*181254a7Smrg# Copyright (c) 2018 Free Software Foundation 5*181254a7Smrg# Contributed by Bernhard Reutner-Fischer <aldot@gcc.gnu.org> 6*181254a7Smrg# Inspired by bloat-o-meter from busybox. 7*181254a7Smrg 8*181254a7Smrg# This software may be used and distributed according to the terms and 9*181254a7Smrg# conditions of the GNU General Public License as published by the Free 10*181254a7Smrg# Software Foundation. 11*181254a7Smrg 12*181254a7Smrg# For a set of object-files, determine symbols that are 13*181254a7Smrg# - public but should be static 14*181254a7Smrg 15*181254a7Smrg# Examples: 16*181254a7Smrg# unused_functions.py ./gcc/fortran 17*181254a7Smrg# unused_functions.py gcc/c gcc/c-family/ gcc/*-c.o | grep -v "'gt_" 18*181254a7Smrg# unused_functions.py gcc/cp gcc/c-family/ gcc/*-c.o | grep -v "'gt_" 19*181254a7Smrg 20*181254a7Smrgimport sys, os 21*181254a7Smrgfrom tempfile import mkdtemp 22*181254a7Smrgfrom subprocess import Popen, PIPE 23*181254a7Smrg 24*181254a7Smrgdef usage(): 25*181254a7Smrg sys.stderr.write("usage: %s [-v] [dirs | files] [-- <readelf options>]\n" 26*181254a7Smrg % sys.argv[0]) 27*181254a7Smrg sys.stderr.write("\t-v\tVerbose output\n"); 28*181254a7Smrg sys.exit(1) 29*181254a7Smrg 30*181254a7Smrg(odir, sym_args, tmpd, verbose) = (set(), "", None, False) 31*181254a7Smrg 32*181254a7Smrgfor i in range(1, len(sys.argv)): 33*181254a7Smrg f = sys.argv[i] 34*181254a7Smrg if f == '--': # sym_args 35*181254a7Smrg sym_args = ' '.join(sys.argv[i + 1:]) 36*181254a7Smrg break 37*181254a7Smrg if f == '-v': 38*181254a7Smrg verbose = True 39*181254a7Smrg continue 40*181254a7Smrg if not os.path.exists(f): 41*181254a7Smrg sys.stderr.write("Error: No such file or directory '%s'\n" % f) 42*181254a7Smrg usage() 43*181254a7Smrg else: 44*181254a7Smrg if f.endswith('.a') and tmpd is None: 45*181254a7Smrg tmpd = mkdtemp(prefix='unused_fun') 46*181254a7Smrg odir.add(f) 47*181254a7Smrg 48*181254a7Smrgdef dbg(args): 49*181254a7Smrg if not verbose: return 50*181254a7Smrg print(args) 51*181254a7Smrg 52*181254a7Smrgdef get_symbols(file): 53*181254a7Smrg syms = {} 54*181254a7Smrg rargs = "readelf -W -s %s %s" % (sym_args, file) 55*181254a7Smrg p0 = Popen((a for a in rargs.split(' ') if a.strip() != ''), stdout=PIPE) 56*181254a7Smrg p1 = Popen(["c++filt"], stdin=p0.stdout, stdout=PIPE, 57*181254a7Smrg universal_newlines=True) 58*181254a7Smrg lines = p1.communicate()[0] 59*181254a7Smrg for l in lines.split('\n'): 60*181254a7Smrg l = l.strip() 61*181254a7Smrg if not len(l) or not l[0].isdigit(): continue 62*181254a7Smrg larr = l.split() 63*181254a7Smrg if len(larr) != 8: continue 64*181254a7Smrg num, value, size, typ, bind, vis, ndx, name = larr 65*181254a7Smrg if typ == 'SECTION' or typ == 'FILE': continue 66*181254a7Smrg # I don't think we have many aliases in gcc, re-instate the addr 67*181254a7Smrg # lut otherwise. 68*181254a7Smrg if vis != 'DEFAULT': continue 69*181254a7Smrg #value = int(value, 16) 70*181254a7Smrg #size = int(size, 16) if size.startswith('0x') else int(size) 71*181254a7Smrg defined = ndx != 'UND' 72*181254a7Smrg globl = bind == 'GLOBAL' 73*181254a7Smrg # c++ RID_FUNCTION_NAME dance. FORNOW: Handled as local use 74*181254a7Smrg # Is that correct? 75*181254a7Smrg if name.endswith('::__FUNCTION__') and typ == 'OBJECT': 76*181254a7Smrg name = name[0:(len(name) - len('::__FUNCTION__'))] 77*181254a7Smrg if defined: defined = False 78*181254a7Smrg if defined and not globl: continue 79*181254a7Smrg syms.setdefault(name, {}) 80*181254a7Smrg syms[name][['use','def'][defined]] = True 81*181254a7Smrg syms[name][['local','global'][globl]] = True 82*181254a7Smrg # Note: we could filter out e.g. debug_* symbols by looking for 83*181254a7Smrg # value in the debug_macro sections. 84*181254a7Smrg if p1.returncode != 0: 85*181254a7Smrg print("Warning: Reading file '%s' exited with %r|%r" 86*181254a7Smrg % (file, p0.returncode, p1.returncode)) 87*181254a7Smrg p0.kill() 88*181254a7Smrg return syms 89*181254a7Smrg 90*181254a7Smrg(oprog, nprog) = ({}, {}) 91*181254a7Smrg 92*181254a7Smrgdef walker(paths): 93*181254a7Smrg def ar_x(archive): 94*181254a7Smrg dbg("Archive %s" % path) 95*181254a7Smrg f = os.path.abspath(archive) 96*181254a7Smrg f = os.path.splitdrive(f)[1] 97*181254a7Smrg d = tmpd + os.path.sep + f 98*181254a7Smrg d = os.path.normpath(d) 99*181254a7Smrg owd = os.getcwd() 100*181254a7Smrg try: 101*181254a7Smrg os.makedirs(d) 102*181254a7Smrg os.chdir(d) 103*181254a7Smrg p0 = Popen(["ar", "x", "%s" % os.path.join(owd, archive)], 104*181254a7Smrg stderr=PIPE, universal_newlines=True) 105*181254a7Smrg p0.communicate() 106*181254a7Smrg if p0.returncode > 0: d = None # assume thin archive 107*181254a7Smrg except: 108*181254a7Smrg dbg("ar x: Error: %s: %s" % (archive, sys.exc_info()[0])) 109*181254a7Smrg os.chdir(owd) 110*181254a7Smrg raise 111*181254a7Smrg os.chdir(owd) 112*181254a7Smrg if d: dbg("Extracted to %s" % (d)) 113*181254a7Smrg return (archive, d) 114*181254a7Smrg 115*181254a7Smrg def ar_t(archive): 116*181254a7Smrg dbg("Thin archive, using existing files:") 117*181254a7Smrg try: 118*181254a7Smrg p0 = Popen(["ar", "t", "%s" % archive], stdout=PIPE, 119*181254a7Smrg universal_newlines=True) 120*181254a7Smrg ret = p0.communicate()[0] 121*181254a7Smrg return ret.split('\n') 122*181254a7Smrg except: 123*181254a7Smrg dbg("ar t: Error: %s: %s" % (archive, sys.exc_info()[0])) 124*181254a7Smrg raise 125*181254a7Smrg 126*181254a7Smrg prog = {} 127*181254a7Smrg for path in paths: 128*181254a7Smrg if os.path.isdir(path): 129*181254a7Smrg for r, dirs, files in os.walk(path): 130*181254a7Smrg if files: dbg("Files %s" % ", ".join(files)) 131*181254a7Smrg if dirs: dbg("Dirs %s" % ", ".join(dirs)) 132*181254a7Smrg prog.update(walker([os.path.join(r, f) for f in files])) 133*181254a7Smrg prog.update(walker([os.path.join(r, d) for d in dirs])) 134*181254a7Smrg else: 135*181254a7Smrg if path.endswith('.a'): 136*181254a7Smrg if ar_x(path)[1] is not None: continue # extract worked 137*181254a7Smrg prog.update(walker(ar_t(path))) 138*181254a7Smrg if not path.endswith('.o'): continue 139*181254a7Smrg dbg("Reading symbols from %s" % (path)) 140*181254a7Smrg prog[os.path.normpath(path)] = get_symbols(path) 141*181254a7Smrg return prog 142*181254a7Smrg 143*181254a7Smrgdef resolve(prog): 144*181254a7Smrg x = prog.keys() 145*181254a7Smrg use = set() 146*181254a7Smrg # for each unique pair of different files 147*181254a7Smrg for (f, g) in ((f,g) for f in x for g in x if f != g): 148*181254a7Smrg refs = set() 149*181254a7Smrg # for each defined symbol 150*181254a7Smrg for s in (s for s in prog[f] if prog[f][s].get('def') and s in prog[g]): 151*181254a7Smrg if prog[g][s].get('use'): 152*181254a7Smrg refs.add(s) 153*181254a7Smrg for s in refs: 154*181254a7Smrg # Prune externally referenced symbols as speed optimization only 155*181254a7Smrg for i in (i for i in x if s in prog[i]): del prog[i][s] 156*181254a7Smrg use |= refs 157*181254a7Smrg return use 158*181254a7Smrg 159*181254a7Smrgtry: 160*181254a7Smrg oprog = walker(odir) 161*181254a7Smrg if tmpd is not None: 162*181254a7Smrg oprog.update(walker([tmpd])) 163*181254a7Smrg oused = resolve(oprog) 164*181254a7Smrgfinally: 165*181254a7Smrg try: 166*181254a7Smrg p0 = Popen(["rm", "-r", "-f", "%s" % (tmpd)], stderr=PIPE, stdout=PIPE) 167*181254a7Smrg p0.communicate() 168*181254a7Smrg if p0.returncode != 0: raise "rm '%s' didn't work out" % (tmpd) 169*181254a7Smrg except: 170*181254a7Smrg from shutil import rmtree 171*181254a7Smrg rmtree(tmpd, ignore_errors=True) 172*181254a7Smrg 173*181254a7Smrgfor (i,s) in ((i,s) for i in oprog.keys() for s in oprog[i] if oprog[i][s]): 174*181254a7Smrg if oprog[i][s].get('def') and not oprog[i][s].get('use'): 175*181254a7Smrg print("%s: Symbol '%s' declared extern but never referenced externally" 176*181254a7Smrg % (i,s)) 177*181254a7Smrg 178*181254a7Smrg 179