1*e5dd7070Spatrick#!/usr/bin/env python 2*e5dd7070Spatrick 3*e5dd7070Spatrickfrom __future__ import absolute_import, division, print_function 4*e5dd7070Spatrickimport os 5*e5dd7070Spatrickimport re 6*e5dd7070Spatrickimport subprocess 7*e5dd7070Spatrickimport sys 8*e5dd7070Spatrickimport tempfile 9*e5dd7070Spatrick 10*e5dd7070Spatrick### 11*e5dd7070Spatrick 12*e5dd7070Spatrickclass DeltaAlgorithm(object): 13*e5dd7070Spatrick def __init__(self): 14*e5dd7070Spatrick self.cache = set() 15*e5dd7070Spatrick 16*e5dd7070Spatrick def test(self, changes): 17*e5dd7070Spatrick abstract 18*e5dd7070Spatrick 19*e5dd7070Spatrick ### 20*e5dd7070Spatrick 21*e5dd7070Spatrick def getTestResult(self, changes): 22*e5dd7070Spatrick # There is no reason to cache successful tests because we will 23*e5dd7070Spatrick # always reduce the changeset when we see one. 24*e5dd7070Spatrick 25*e5dd7070Spatrick changeset = frozenset(changes) 26*e5dd7070Spatrick if changeset in self.cache: 27*e5dd7070Spatrick return False 28*e5dd7070Spatrick elif not self.test(changes): 29*e5dd7070Spatrick self.cache.add(changeset) 30*e5dd7070Spatrick return False 31*e5dd7070Spatrick else: 32*e5dd7070Spatrick return True 33*e5dd7070Spatrick 34*e5dd7070Spatrick def run(self, changes, force=False): 35*e5dd7070Spatrick # Make sure the initial test passes, if not then (a) either 36*e5dd7070Spatrick # the user doesn't expect monotonicity, and we may end up 37*e5dd7070Spatrick # doing O(N^2) tests, or (b) the test is wrong. Avoid the 38*e5dd7070Spatrick # O(N^2) case unless user requests it. 39*e5dd7070Spatrick if not force: 40*e5dd7070Spatrick if not self.getTestResult(changes): 41*e5dd7070Spatrick raise ValueError('Initial test passed to delta fails.') 42*e5dd7070Spatrick 43*e5dd7070Spatrick # Check empty set first to quickly find poor test functions. 44*e5dd7070Spatrick if self.getTestResult(set()): 45*e5dd7070Spatrick return set() 46*e5dd7070Spatrick else: 47*e5dd7070Spatrick return self.delta(changes, self.split(changes)) 48*e5dd7070Spatrick 49*e5dd7070Spatrick def split(self, S): 50*e5dd7070Spatrick """split(set) -> [sets] 51*e5dd7070Spatrick 52*e5dd7070Spatrick Partition a set into one or two pieces. 53*e5dd7070Spatrick """ 54*e5dd7070Spatrick 55*e5dd7070Spatrick # There are many ways to split, we could do a better job with more 56*e5dd7070Spatrick # context information (but then the API becomes grosser). 57*e5dd7070Spatrick L = list(S) 58*e5dd7070Spatrick mid = len(L)//2 59*e5dd7070Spatrick if mid==0: 60*e5dd7070Spatrick return L, 61*e5dd7070Spatrick else: 62*e5dd7070Spatrick return L[:mid],L[mid:] 63*e5dd7070Spatrick 64*e5dd7070Spatrick def delta(self, c, sets): 65*e5dd7070Spatrick # assert(reduce(set.union, sets, set()) == c) 66*e5dd7070Spatrick 67*e5dd7070Spatrick # If there is nothing left we can remove, we are done. 68*e5dd7070Spatrick if len(sets) <= 1: 69*e5dd7070Spatrick return c 70*e5dd7070Spatrick 71*e5dd7070Spatrick # Look for a passing subset. 72*e5dd7070Spatrick res = self.search(c, sets) 73*e5dd7070Spatrick if res is not None: 74*e5dd7070Spatrick return res 75*e5dd7070Spatrick 76*e5dd7070Spatrick # Otherwise, partition sets if possible; if not we are done. 77*e5dd7070Spatrick refined = sum(map(list, map(self.split, sets)), []) 78*e5dd7070Spatrick if len(refined) == len(sets): 79*e5dd7070Spatrick return c 80*e5dd7070Spatrick 81*e5dd7070Spatrick return self.delta(c, refined) 82*e5dd7070Spatrick 83*e5dd7070Spatrick def search(self, c, sets): 84*e5dd7070Spatrick for i,S in enumerate(sets): 85*e5dd7070Spatrick # If test passes on this subset alone, recurse. 86*e5dd7070Spatrick if self.getTestResult(S): 87*e5dd7070Spatrick return self.delta(S, self.split(S)) 88*e5dd7070Spatrick 89*e5dd7070Spatrick # Otherwise if we have more than two sets, see if test 90*e5dd7070Spatrick # pases without this subset. 91*e5dd7070Spatrick if len(sets) > 2: 92*e5dd7070Spatrick complement = sum(sets[:i] + sets[i+1:],[]) 93*e5dd7070Spatrick if self.getTestResult(complement): 94*e5dd7070Spatrick return self.delta(complement, sets[:i] + sets[i+1:]) 95*e5dd7070Spatrick 96*e5dd7070Spatrick### 97*e5dd7070Spatrick 98*e5dd7070Spatrickclass Token(object): 99*e5dd7070Spatrick def __init__(self, type, data, flags, file, line, column): 100*e5dd7070Spatrick self.type = type 101*e5dd7070Spatrick self.data = data 102*e5dd7070Spatrick self.flags = flags 103*e5dd7070Spatrick self.file = file 104*e5dd7070Spatrick self.line = line 105*e5dd7070Spatrick self.column = column 106*e5dd7070Spatrick 107*e5dd7070SpatrickkTokenRE = re.compile(r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""", 108*e5dd7070Spatrick re.DOTALL | re.MULTILINE) 109*e5dd7070Spatrick 110*e5dd7070Spatrickdef getTokens(path): 111*e5dd7070Spatrick p = subprocess.Popen(['clang','-dump-raw-tokens',path], 112*e5dd7070Spatrick stdin=subprocess.PIPE, 113*e5dd7070Spatrick stdout=subprocess.PIPE, 114*e5dd7070Spatrick stderr=subprocess.PIPE) 115*e5dd7070Spatrick out,err = p.communicate() 116*e5dd7070Spatrick 117*e5dd7070Spatrick tokens = [] 118*e5dd7070Spatrick collect = None 119*e5dd7070Spatrick for ln in err.split('\n'): 120*e5dd7070Spatrick # Silly programmers refuse to print in simple machine readable 121*e5dd7070Spatrick # formats. Whatever. 122*e5dd7070Spatrick if collect is None: 123*e5dd7070Spatrick collect = ln 124*e5dd7070Spatrick else: 125*e5dd7070Spatrick collect = collect + '\n' + ln 126*e5dd7070Spatrick if 'Loc=<' in ln and ln.endswith('>'): 127*e5dd7070Spatrick ln,collect = collect,None 128*e5dd7070Spatrick tokens.append(Token(*kTokenRE.match(ln).groups())) 129*e5dd7070Spatrick 130*e5dd7070Spatrick return tokens 131*e5dd7070Spatrick 132*e5dd7070Spatrick### 133*e5dd7070Spatrick 134*e5dd7070Spatrickclass TMBDDelta(DeltaAlgorithm): 135*e5dd7070Spatrick def __init__(self, testProgram, tokenLists, log): 136*e5dd7070Spatrick def patchName(name, suffix): 137*e5dd7070Spatrick base,ext = os.path.splitext(name) 138*e5dd7070Spatrick return base + '.' + suffix + ext 139*e5dd7070Spatrick super(TMBDDelta, self).__init__() 140*e5dd7070Spatrick self.testProgram = testProgram 141*e5dd7070Spatrick self.tokenLists = tokenLists 142*e5dd7070Spatrick self.tempFiles = [patchName(f,'tmp') 143*e5dd7070Spatrick for f,_ in self.tokenLists] 144*e5dd7070Spatrick self.targetFiles = [patchName(f,'ok') 145*e5dd7070Spatrick for f,_ in self.tokenLists] 146*e5dd7070Spatrick self.log = log 147*e5dd7070Spatrick self.numTests = 0 148*e5dd7070Spatrick 149*e5dd7070Spatrick def writeFiles(self, changes, fileNames): 150*e5dd7070Spatrick assert len(fileNames) == len(self.tokenLists) 151*e5dd7070Spatrick byFile = [[] for i in self.tokenLists] 152*e5dd7070Spatrick for i,j in changes: 153*e5dd7070Spatrick byFile[i].append(j) 154*e5dd7070Spatrick 155*e5dd7070Spatrick for i,(file,tokens) in enumerate(self.tokenLists): 156*e5dd7070Spatrick f = open(fileNames[i],'w') 157*e5dd7070Spatrick for j in byFile[i]: 158*e5dd7070Spatrick f.write(tokens[j]) 159*e5dd7070Spatrick f.close() 160*e5dd7070Spatrick 161*e5dd7070Spatrick return byFile 162*e5dd7070Spatrick 163*e5dd7070Spatrick def test(self, changes): 164*e5dd7070Spatrick self.numTests += 1 165*e5dd7070Spatrick 166*e5dd7070Spatrick byFile = self.writeFiles(changes, self.tempFiles) 167*e5dd7070Spatrick 168*e5dd7070Spatrick if self.log: 169*e5dd7070Spatrick print('TEST - ', end=' ', file=sys.stderr) 170*e5dd7070Spatrick if self.log > 1: 171*e5dd7070Spatrick for i,(file,_) in enumerate(self.tokenLists): 172*e5dd7070Spatrick indices = byFile[i] 173*e5dd7070Spatrick if i: 174*e5dd7070Spatrick sys.stderr.write('\n ') 175*e5dd7070Spatrick sys.stderr.write('%s:%d tokens: [' % (file,len(byFile[i]))) 176*e5dd7070Spatrick prev = None 177*e5dd7070Spatrick for j in byFile[i]: 178*e5dd7070Spatrick if prev is None or j != prev + 1: 179*e5dd7070Spatrick if prev: 180*e5dd7070Spatrick sys.stderr.write('%d][' % prev) 181*e5dd7070Spatrick sys.stderr.write(str(j)) 182*e5dd7070Spatrick sys.stderr.write(':') 183*e5dd7070Spatrick prev = j 184*e5dd7070Spatrick if byFile[i]: 185*e5dd7070Spatrick sys.stderr.write(str(byFile[i][-1])) 186*e5dd7070Spatrick sys.stderr.write('] ') 187*e5dd7070Spatrick else: 188*e5dd7070Spatrick print(', '.join(['%s:%d tokens' % (file, len(byFile[i])) 189*e5dd7070Spatrick for i,(file,_) in enumerate(self.tokenLists)]), end=' ', file=sys.stderr) 190*e5dd7070Spatrick 191*e5dd7070Spatrick p = subprocess.Popen([self.testProgram] + self.tempFiles) 192*e5dd7070Spatrick res = p.wait() == 0 193*e5dd7070Spatrick 194*e5dd7070Spatrick if res: 195*e5dd7070Spatrick self.writeFiles(changes, self.targetFiles) 196*e5dd7070Spatrick 197*e5dd7070Spatrick if self.log: 198*e5dd7070Spatrick print('=> %s' % res, file=sys.stderr) 199*e5dd7070Spatrick else: 200*e5dd7070Spatrick if res: 201*e5dd7070Spatrick print('\nSUCCESS (%d tokens)' % len(changes)) 202*e5dd7070Spatrick else: 203*e5dd7070Spatrick sys.stderr.write('.') 204*e5dd7070Spatrick 205*e5dd7070Spatrick return res 206*e5dd7070Spatrick 207*e5dd7070Spatrick def run(self): 208*e5dd7070Spatrick res = super(TMBDDelta, self).run([(i,j) 209*e5dd7070Spatrick for i,(file,tokens) in enumerate(self.tokenLists) 210*e5dd7070Spatrick for j in range(len(tokens))]) 211*e5dd7070Spatrick self.writeFiles(res, self.targetFiles) 212*e5dd7070Spatrick if not self.log: 213*e5dd7070Spatrick print(file=sys.stderr) 214*e5dd7070Spatrick return res 215*e5dd7070Spatrick 216*e5dd7070Spatrickdef tokenBasedMultiDelta(program, files, log): 217*e5dd7070Spatrick # Read in the lists of tokens. 218*e5dd7070Spatrick tokenLists = [(file, [t.data for t in getTokens(file)]) 219*e5dd7070Spatrick for file in files] 220*e5dd7070Spatrick 221*e5dd7070Spatrick numTokens = sum([len(tokens) for _,tokens in tokenLists]) 222*e5dd7070Spatrick print("Delta on %s with %d tokens." % (', '.join(files), numTokens)) 223*e5dd7070Spatrick 224*e5dd7070Spatrick tbmd = TMBDDelta(program, tokenLists, log) 225*e5dd7070Spatrick 226*e5dd7070Spatrick res = tbmd.run() 227*e5dd7070Spatrick 228*e5dd7070Spatrick print("Finished %s with %d tokens (in %d tests)." % (', '.join(tbmd.targetFiles), 229*e5dd7070Spatrick len(res), 230*e5dd7070Spatrick tbmd.numTests)) 231*e5dd7070Spatrick 232*e5dd7070Spatrickdef main(): 233*e5dd7070Spatrick from optparse import OptionParser, OptionGroup 234*e5dd7070Spatrick parser = OptionParser("%prog <test program> {files+}") 235*e5dd7070Spatrick parser.add_option("", "--debug", dest="debugLevel", 236*e5dd7070Spatrick help="set debug level [default %default]", 237*e5dd7070Spatrick action="store", type=int, default=0) 238*e5dd7070Spatrick (opts, args) = parser.parse_args() 239*e5dd7070Spatrick 240*e5dd7070Spatrick if len(args) <= 1: 241*e5dd7070Spatrick parser.error('Invalid number of arguments.') 242*e5dd7070Spatrick 243*e5dd7070Spatrick program,files = args[0],args[1:] 244*e5dd7070Spatrick 245*e5dd7070Spatrick md = tokenBasedMultiDelta(program, files, log=opts.debugLevel) 246*e5dd7070Spatrick 247*e5dd7070Spatrickif __name__ == '__main__': 248*e5dd7070Spatrick try: 249*e5dd7070Spatrick main() 250*e5dd7070Spatrick except KeyboardInterrupt: 251*e5dd7070Spatrick print('Interrupted.', file=sys.stderr) 252*e5dd7070Spatrick os._exit(1) # Avoid freeing our giant cache. 253