1#!/usr/bin/env python 2 3from __future__ import absolute_import, division, print_function 4import os 5import re 6import subprocess 7import sys 8import tempfile 9 10### 11 12 13class DeltaAlgorithm(object): 14 def __init__(self): 15 self.cache = set() 16 17 def test(self, changes): 18 abstract 19 20 ### 21 22 def getTestResult(self, changes): 23 # There is no reason to cache successful tests because we will 24 # always reduce the changeset when we see one. 25 26 changeset = frozenset(changes) 27 if changeset in self.cache: 28 return False 29 elif not self.test(changes): 30 self.cache.add(changeset) 31 return False 32 else: 33 return True 34 35 def run(self, changes, force=False): 36 # Make sure the initial test passes, if not then (a) either 37 # the user doesn't expect monotonicity, and we may end up 38 # doing O(N^2) tests, or (b) the test is wrong. Avoid the 39 # O(N^2) case unless user requests it. 40 if not force: 41 if not self.getTestResult(changes): 42 raise ValueError("Initial test passed to delta fails.") 43 44 # Check empty set first to quickly find poor test functions. 45 if self.getTestResult(set()): 46 return set() 47 else: 48 return self.delta(changes, self.split(changes)) 49 50 def split(self, S): 51 """split(set) -> [sets] 52 53 Partition a set into one or two pieces. 54 """ 55 56 # There are many ways to split, we could do a better job with more 57 # context information (but then the API becomes grosser). 58 L = list(S) 59 mid = len(L) // 2 60 if mid == 0: 61 return (L,) 62 else: 63 return L[:mid], L[mid:] 64 65 def delta(self, c, sets): 66 # assert(reduce(set.union, sets, set()) == c) 67 68 # If there is nothing left we can remove, we are done. 69 if len(sets) <= 1: 70 return c 71 72 # Look for a passing subset. 73 res = self.search(c, sets) 74 if res is not None: 75 return res 76 77 # Otherwise, partition sets if possible; if not we are done. 78 refined = sum(map(list, map(self.split, sets)), []) 79 if len(refined) == len(sets): 80 return c 81 82 return self.delta(c, refined) 83 84 def search(self, c, sets): 85 for i, S in enumerate(sets): 86 # If test passes on this subset alone, recurse. 87 if self.getTestResult(S): 88 return self.delta(S, self.split(S)) 89 90 # Otherwise if we have more than two sets, see if test 91 # pases without this subset. 92 if len(sets) > 2: 93 complement = sum(sets[:i] + sets[i + 1 :], []) 94 if self.getTestResult(complement): 95 return self.delta(complement, sets[:i] + sets[i + 1 :]) 96 97 98### 99 100 101class Token(object): 102 def __init__(self, type, data, flags, file, line, column): 103 self.type = type 104 self.data = data 105 self.flags = flags 106 self.file = file 107 self.line = line 108 self.column = column 109 110 111kTokenRE = re.compile( 112 r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""", re.DOTALL | re.MULTILINE 113) 114 115 116def getTokens(path): 117 p = subprocess.Popen( 118 ["clang", "-dump-raw-tokens", path], 119 stdin=subprocess.PIPE, 120 stdout=subprocess.PIPE, 121 stderr=subprocess.PIPE, 122 ) 123 out, err = p.communicate() 124 125 tokens = [] 126 collect = None 127 for ln in err.split("\n"): 128 # Silly programmers refuse to print in simple machine readable 129 # formats. Whatever. 130 if collect is None: 131 collect = ln 132 else: 133 collect = collect + "\n" + ln 134 if "Loc=<" in ln and ln.endswith(">"): 135 ln, collect = collect, None 136 tokens.append(Token(*kTokenRE.match(ln).groups())) 137 138 return tokens 139 140 141### 142 143 144class TMBDDelta(DeltaAlgorithm): 145 def __init__(self, testProgram, tokenLists, log): 146 def patchName(name, suffix): 147 base, ext = os.path.splitext(name) 148 return base + "." + suffix + ext 149 150 super(TMBDDelta, self).__init__() 151 self.testProgram = testProgram 152 self.tokenLists = tokenLists 153 self.tempFiles = [patchName(f, "tmp") for f, _ in self.tokenLists] 154 self.targetFiles = [patchName(f, "ok") for f, _ in self.tokenLists] 155 self.log = log 156 self.numTests = 0 157 158 def writeFiles(self, changes, fileNames): 159 assert len(fileNames) == len(self.tokenLists) 160 byFile = [[] for i in self.tokenLists] 161 for i, j in changes: 162 byFile[i].append(j) 163 164 for i, (file, tokens) in enumerate(self.tokenLists): 165 f = open(fileNames[i], "w") 166 for j in byFile[i]: 167 f.write(tokens[j]) 168 f.close() 169 170 return byFile 171 172 def test(self, changes): 173 self.numTests += 1 174 175 byFile = self.writeFiles(changes, self.tempFiles) 176 177 if self.log: 178 print("TEST - ", end=" ", file=sys.stderr) 179 if self.log > 1: 180 for i, (file, _) in enumerate(self.tokenLists): 181 indices = byFile[i] 182 if i: 183 sys.stderr.write("\n ") 184 sys.stderr.write("%s:%d tokens: [" % (file, len(byFile[i]))) 185 prev = None 186 for j in byFile[i]: 187 if prev is None or j != prev + 1: 188 if prev: 189 sys.stderr.write("%d][" % prev) 190 sys.stderr.write(str(j)) 191 sys.stderr.write(":") 192 prev = j 193 if byFile[i]: 194 sys.stderr.write(str(byFile[i][-1])) 195 sys.stderr.write("] ") 196 else: 197 print( 198 ", ".join( 199 [ 200 "%s:%d tokens" % (file, len(byFile[i])) 201 for i, (file, _) in enumerate(self.tokenLists) 202 ] 203 ), 204 end=" ", 205 file=sys.stderr, 206 ) 207 208 p = subprocess.Popen([self.testProgram] + self.tempFiles) 209 res = p.wait() == 0 210 211 if res: 212 self.writeFiles(changes, self.targetFiles) 213 214 if self.log: 215 print("=> %s" % res, file=sys.stderr) 216 else: 217 if res: 218 print("\nSUCCESS (%d tokens)" % len(changes)) 219 else: 220 sys.stderr.write(".") 221 222 return res 223 224 def run(self): 225 res = super(TMBDDelta, self).run( 226 [ 227 (i, j) 228 for i, (file, tokens) in enumerate(self.tokenLists) 229 for j in range(len(tokens)) 230 ] 231 ) 232 self.writeFiles(res, self.targetFiles) 233 if not self.log: 234 print(file=sys.stderr) 235 return res 236 237 238def tokenBasedMultiDelta(program, files, log): 239 # Read in the lists of tokens. 240 tokenLists = [(file, [t.data for t in getTokens(file)]) for file in files] 241 242 numTokens = sum([len(tokens) for _, tokens in tokenLists]) 243 print("Delta on %s with %d tokens." % (", ".join(files), numTokens)) 244 245 tbmd = TMBDDelta(program, tokenLists, log) 246 247 res = tbmd.run() 248 249 print( 250 "Finished %s with %d tokens (in %d tests)." 251 % (", ".join(tbmd.targetFiles), len(res), tbmd.numTests) 252 ) 253 254 255def main(): 256 from optparse import OptionParser, OptionGroup 257 258 parser = OptionParser("%prog <test program> {files+}") 259 parser.add_option( 260 "", 261 "--debug", 262 dest="debugLevel", 263 help="set debug level [default %default]", 264 action="store", 265 type=int, 266 default=0, 267 ) 268 (opts, args) = parser.parse_args() 269 270 if len(args) <= 1: 271 parser.error("Invalid number of arguments.") 272 273 program, files = args[0], args[1:] 274 275 md = tokenBasedMultiDelta(program, files, log=opts.debugLevel) 276 277 278if __name__ == "__main__": 279 try: 280 main() 281 except KeyboardInterrupt: 282 print("Interrupted.", file=sys.stderr) 283 os._exit(1) # Avoid freeing our giant cache. 284