1*7330f729Sjoerg#!/usr/bin/env python 2*7330f729Sjoerg 3*7330f729Sjoergfrom __future__ import absolute_import, division, print_function 4*7330f729Sjoergimport os 5*7330f729Sjoergimport re 6*7330f729Sjoergimport subprocess 7*7330f729Sjoergimport sys 8*7330f729Sjoergimport tempfile 9*7330f729Sjoerg 10*7330f729Sjoerg### 11*7330f729Sjoerg 12*7330f729Sjoergclass DeltaAlgorithm(object): 13*7330f729Sjoerg def __init__(self): 14*7330f729Sjoerg self.cache = set() 15*7330f729Sjoerg 16*7330f729Sjoerg def test(self, changes): 17*7330f729Sjoerg abstract 18*7330f729Sjoerg 19*7330f729Sjoerg ### 20*7330f729Sjoerg 21*7330f729Sjoerg def getTestResult(self, changes): 22*7330f729Sjoerg # There is no reason to cache successful tests because we will 23*7330f729Sjoerg # always reduce the changeset when we see one. 24*7330f729Sjoerg 25*7330f729Sjoerg changeset = frozenset(changes) 26*7330f729Sjoerg if changeset in self.cache: 27*7330f729Sjoerg return False 28*7330f729Sjoerg elif not self.test(changes): 29*7330f729Sjoerg self.cache.add(changeset) 30*7330f729Sjoerg return False 31*7330f729Sjoerg else: 32*7330f729Sjoerg return True 33*7330f729Sjoerg 34*7330f729Sjoerg def run(self, changes, force=False): 35*7330f729Sjoerg # Make sure the initial test passes, if not then (a) either 36*7330f729Sjoerg # the user doesn't expect monotonicity, and we may end up 37*7330f729Sjoerg # doing O(N^2) tests, or (b) the test is wrong. Avoid the 38*7330f729Sjoerg # O(N^2) case unless user requests it. 39*7330f729Sjoerg if not force: 40*7330f729Sjoerg if not self.getTestResult(changes): 41*7330f729Sjoerg raise ValueError('Initial test passed to delta fails.') 42*7330f729Sjoerg 43*7330f729Sjoerg # Check empty set first to quickly find poor test functions. 44*7330f729Sjoerg if self.getTestResult(set()): 45*7330f729Sjoerg return set() 46*7330f729Sjoerg else: 47*7330f729Sjoerg return self.delta(changes, self.split(changes)) 48*7330f729Sjoerg 49*7330f729Sjoerg def split(self, S): 50*7330f729Sjoerg """split(set) -> [sets] 51*7330f729Sjoerg 52*7330f729Sjoerg Partition a set into one or two pieces. 53*7330f729Sjoerg """ 54*7330f729Sjoerg 55*7330f729Sjoerg # There are many ways to split, we could do a better job with more 56*7330f729Sjoerg # context information (but then the API becomes grosser). 57*7330f729Sjoerg L = list(S) 58*7330f729Sjoerg mid = len(L)//2 59*7330f729Sjoerg if mid==0: 60*7330f729Sjoerg return L, 61*7330f729Sjoerg else: 62*7330f729Sjoerg return L[:mid],L[mid:] 63*7330f729Sjoerg 64*7330f729Sjoerg def delta(self, c, sets): 65*7330f729Sjoerg # assert(reduce(set.union, sets, set()) == c) 66*7330f729Sjoerg 67*7330f729Sjoerg # If there is nothing left we can remove, we are done. 68*7330f729Sjoerg if len(sets) <= 1: 69*7330f729Sjoerg return c 70*7330f729Sjoerg 71*7330f729Sjoerg # Look for a passing subset. 72*7330f729Sjoerg res = self.search(c, sets) 73*7330f729Sjoerg if res is not None: 74*7330f729Sjoerg return res 75*7330f729Sjoerg 76*7330f729Sjoerg # Otherwise, partition sets if possible; if not we are done. 77*7330f729Sjoerg refined = sum(map(list, map(self.split, sets)), []) 78*7330f729Sjoerg if len(refined) == len(sets): 79*7330f729Sjoerg return c 80*7330f729Sjoerg 81*7330f729Sjoerg return self.delta(c, refined) 82*7330f729Sjoerg 83*7330f729Sjoerg def search(self, c, sets): 84*7330f729Sjoerg for i,S in enumerate(sets): 85*7330f729Sjoerg # If test passes on this subset alone, recurse. 86*7330f729Sjoerg if self.getTestResult(S): 87*7330f729Sjoerg return self.delta(S, self.split(S)) 88*7330f729Sjoerg 89*7330f729Sjoerg # Otherwise if we have more than two sets, see if test 90*7330f729Sjoerg # pases without this subset. 91*7330f729Sjoerg if len(sets) > 2: 92*7330f729Sjoerg complement = sum(sets[:i] + sets[i+1:],[]) 93*7330f729Sjoerg if self.getTestResult(complement): 94*7330f729Sjoerg return self.delta(complement, sets[:i] + sets[i+1:]) 95*7330f729Sjoerg 96*7330f729Sjoerg### 97*7330f729Sjoerg 98*7330f729Sjoergclass Token(object): 99*7330f729Sjoerg def __init__(self, type, data, flags, file, line, column): 100*7330f729Sjoerg self.type = type 101*7330f729Sjoerg self.data = data 102*7330f729Sjoerg self.flags = flags 103*7330f729Sjoerg self.file = file 104*7330f729Sjoerg self.line = line 105*7330f729Sjoerg self.column = column 106*7330f729Sjoerg 107*7330f729SjoergkTokenRE = re.compile(r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""", 108*7330f729Sjoerg re.DOTALL | re.MULTILINE) 109*7330f729Sjoerg 110*7330f729Sjoergdef getTokens(path): 111*7330f729Sjoerg p = subprocess.Popen(['clang','-dump-raw-tokens',path], 112*7330f729Sjoerg stdin=subprocess.PIPE, 113*7330f729Sjoerg stdout=subprocess.PIPE, 114*7330f729Sjoerg stderr=subprocess.PIPE) 115*7330f729Sjoerg out,err = p.communicate() 116*7330f729Sjoerg 117*7330f729Sjoerg tokens = [] 118*7330f729Sjoerg collect = None 119*7330f729Sjoerg for ln in err.split('\n'): 120*7330f729Sjoerg # Silly programmers refuse to print in simple machine readable 121*7330f729Sjoerg # formats. Whatever. 122*7330f729Sjoerg if collect is None: 123*7330f729Sjoerg collect = ln 124*7330f729Sjoerg else: 125*7330f729Sjoerg collect = collect + '\n' + ln 126*7330f729Sjoerg if 'Loc=<' in ln and ln.endswith('>'): 127*7330f729Sjoerg ln,collect = collect,None 128*7330f729Sjoerg tokens.append(Token(*kTokenRE.match(ln).groups())) 129*7330f729Sjoerg 130*7330f729Sjoerg return tokens 131*7330f729Sjoerg 132*7330f729Sjoerg### 133*7330f729Sjoerg 134*7330f729Sjoergclass TMBDDelta(DeltaAlgorithm): 135*7330f729Sjoerg def __init__(self, testProgram, tokenLists, log): 136*7330f729Sjoerg def patchName(name, suffix): 137*7330f729Sjoerg base,ext = os.path.splitext(name) 138*7330f729Sjoerg return base + '.' + suffix + ext 139*7330f729Sjoerg super(TMBDDelta, self).__init__() 140*7330f729Sjoerg self.testProgram = testProgram 141*7330f729Sjoerg self.tokenLists = tokenLists 142*7330f729Sjoerg self.tempFiles = [patchName(f,'tmp') 143*7330f729Sjoerg for f,_ in self.tokenLists] 144*7330f729Sjoerg self.targetFiles = [patchName(f,'ok') 145*7330f729Sjoerg for f,_ in self.tokenLists] 146*7330f729Sjoerg self.log = log 147*7330f729Sjoerg self.numTests = 0 148*7330f729Sjoerg 149*7330f729Sjoerg def writeFiles(self, changes, fileNames): 150*7330f729Sjoerg assert len(fileNames) == len(self.tokenLists) 151*7330f729Sjoerg byFile = [[] for i in self.tokenLists] 152*7330f729Sjoerg for i,j in changes: 153*7330f729Sjoerg byFile[i].append(j) 154*7330f729Sjoerg 155*7330f729Sjoerg for i,(file,tokens) in enumerate(self.tokenLists): 156*7330f729Sjoerg f = open(fileNames[i],'w') 157*7330f729Sjoerg for j in byFile[i]: 158*7330f729Sjoerg f.write(tokens[j]) 159*7330f729Sjoerg f.close() 160*7330f729Sjoerg 161*7330f729Sjoerg return byFile 162*7330f729Sjoerg 163*7330f729Sjoerg def test(self, changes): 164*7330f729Sjoerg self.numTests += 1 165*7330f729Sjoerg 166*7330f729Sjoerg byFile = self.writeFiles(changes, self.tempFiles) 167*7330f729Sjoerg 168*7330f729Sjoerg if self.log: 169*7330f729Sjoerg print('TEST - ', end=' ', file=sys.stderr) 170*7330f729Sjoerg if self.log > 1: 171*7330f729Sjoerg for i,(file,_) in enumerate(self.tokenLists): 172*7330f729Sjoerg indices = byFile[i] 173*7330f729Sjoerg if i: 174*7330f729Sjoerg sys.stderr.write('\n ') 175*7330f729Sjoerg sys.stderr.write('%s:%d tokens: [' % (file,len(byFile[i]))) 176*7330f729Sjoerg prev = None 177*7330f729Sjoerg for j in byFile[i]: 178*7330f729Sjoerg if prev is None or j != prev + 1: 179*7330f729Sjoerg if prev: 180*7330f729Sjoerg sys.stderr.write('%d][' % prev) 181*7330f729Sjoerg sys.stderr.write(str(j)) 182*7330f729Sjoerg sys.stderr.write(':') 183*7330f729Sjoerg prev = j 184*7330f729Sjoerg if byFile[i]: 185*7330f729Sjoerg sys.stderr.write(str(byFile[i][-1])) 186*7330f729Sjoerg sys.stderr.write('] ') 187*7330f729Sjoerg else: 188*7330f729Sjoerg print(', '.join(['%s:%d tokens' % (file, len(byFile[i])) 189*7330f729Sjoerg for i,(file,_) in enumerate(self.tokenLists)]), end=' ', file=sys.stderr) 190*7330f729Sjoerg 191*7330f729Sjoerg p = subprocess.Popen([self.testProgram] + self.tempFiles) 192*7330f729Sjoerg res = p.wait() == 0 193*7330f729Sjoerg 194*7330f729Sjoerg if res: 195*7330f729Sjoerg self.writeFiles(changes, self.targetFiles) 196*7330f729Sjoerg 197*7330f729Sjoerg if self.log: 198*7330f729Sjoerg print('=> %s' % res, file=sys.stderr) 199*7330f729Sjoerg else: 200*7330f729Sjoerg if res: 201*7330f729Sjoerg print('\nSUCCESS (%d tokens)' % len(changes)) 202*7330f729Sjoerg else: 203*7330f729Sjoerg sys.stderr.write('.') 204*7330f729Sjoerg 205*7330f729Sjoerg return res 206*7330f729Sjoerg 207*7330f729Sjoerg def run(self): 208*7330f729Sjoerg res = super(TMBDDelta, self).run([(i,j) 209*7330f729Sjoerg for i,(file,tokens) in enumerate(self.tokenLists) 210*7330f729Sjoerg for j in range(len(tokens))]) 211*7330f729Sjoerg self.writeFiles(res, self.targetFiles) 212*7330f729Sjoerg if not self.log: 213*7330f729Sjoerg print(file=sys.stderr) 214*7330f729Sjoerg return res 215*7330f729Sjoerg 216*7330f729Sjoergdef tokenBasedMultiDelta(program, files, log): 217*7330f729Sjoerg # Read in the lists of tokens. 218*7330f729Sjoerg tokenLists = [(file, [t.data for t in getTokens(file)]) 219*7330f729Sjoerg for file in files] 220*7330f729Sjoerg 221*7330f729Sjoerg numTokens = sum([len(tokens) for _,tokens in tokenLists]) 222*7330f729Sjoerg print("Delta on %s with %d tokens." % (', '.join(files), numTokens)) 223*7330f729Sjoerg 224*7330f729Sjoerg tbmd = TMBDDelta(program, tokenLists, log) 225*7330f729Sjoerg 226*7330f729Sjoerg res = tbmd.run() 227*7330f729Sjoerg 228*7330f729Sjoerg print("Finished %s with %d tokens (in %d tests)." % (', '.join(tbmd.targetFiles), 229*7330f729Sjoerg len(res), 230*7330f729Sjoerg tbmd.numTests)) 231*7330f729Sjoerg 232*7330f729Sjoergdef main(): 233*7330f729Sjoerg from optparse import OptionParser, OptionGroup 234*7330f729Sjoerg parser = OptionParser("%prog <test program> {files+}") 235*7330f729Sjoerg parser.add_option("", "--debug", dest="debugLevel", 236*7330f729Sjoerg help="set debug level [default %default]", 237*7330f729Sjoerg action="store", type=int, default=0) 238*7330f729Sjoerg (opts, args) = parser.parse_args() 239*7330f729Sjoerg 240*7330f729Sjoerg if len(args) <= 1: 241*7330f729Sjoerg parser.error('Invalid number of arguments.') 242*7330f729Sjoerg 243*7330f729Sjoerg program,files = args[0],args[1:] 244*7330f729Sjoerg 245*7330f729Sjoerg md = tokenBasedMultiDelta(program, files, log=opts.debugLevel) 246*7330f729Sjoerg 247*7330f729Sjoergif __name__ == '__main__': 248*7330f729Sjoerg try: 249*7330f729Sjoerg main() 250*7330f729Sjoerg except KeyboardInterrupt: 251*7330f729Sjoerg print('Interrupted.', file=sys.stderr) 252*7330f729Sjoerg os._exit(1) # Avoid freeing our giant cache. 253