xref: /openbsd-src/gnu/llvm/clang/utils/token-delta.py (revision e5dd70708596ae51455a0ffa086a00c5b29f8583)
1*e5dd7070Spatrick#!/usr/bin/env python
2*e5dd7070Spatrick
3*e5dd7070Spatrickfrom __future__ import absolute_import, division, print_function
4*e5dd7070Spatrickimport os
5*e5dd7070Spatrickimport re
6*e5dd7070Spatrickimport subprocess
7*e5dd7070Spatrickimport sys
8*e5dd7070Spatrickimport tempfile
9*e5dd7070Spatrick
10*e5dd7070Spatrick###
11*e5dd7070Spatrick
12*e5dd7070Spatrickclass DeltaAlgorithm(object):
13*e5dd7070Spatrick    def __init__(self):
14*e5dd7070Spatrick        self.cache = set()
15*e5dd7070Spatrick
16*e5dd7070Spatrick    def test(self, changes):
17*e5dd7070Spatrick        abstract
18*e5dd7070Spatrick
19*e5dd7070Spatrick    ###
20*e5dd7070Spatrick
21*e5dd7070Spatrick    def getTestResult(self, changes):
22*e5dd7070Spatrick        # There is no reason to cache successful tests because we will
23*e5dd7070Spatrick        # always reduce the changeset when we see one.
24*e5dd7070Spatrick
25*e5dd7070Spatrick        changeset = frozenset(changes)
26*e5dd7070Spatrick        if changeset in self.cache:
27*e5dd7070Spatrick            return False
28*e5dd7070Spatrick        elif not self.test(changes):
29*e5dd7070Spatrick            self.cache.add(changeset)
30*e5dd7070Spatrick            return False
31*e5dd7070Spatrick        else:
32*e5dd7070Spatrick            return True
33*e5dd7070Spatrick
34*e5dd7070Spatrick    def run(self, changes, force=False):
35*e5dd7070Spatrick        # Make sure the initial test passes, if not then (a) either
36*e5dd7070Spatrick        # the user doesn't expect monotonicity, and we may end up
37*e5dd7070Spatrick        # doing O(N^2) tests, or (b) the test is wrong. Avoid the
38*e5dd7070Spatrick        # O(N^2) case unless user requests it.
39*e5dd7070Spatrick        if not force:
40*e5dd7070Spatrick            if not self.getTestResult(changes):
41*e5dd7070Spatrick                raise ValueError('Initial test passed to delta fails.')
42*e5dd7070Spatrick
43*e5dd7070Spatrick        # Check empty set first to quickly find poor test functions.
44*e5dd7070Spatrick        if self.getTestResult(set()):
45*e5dd7070Spatrick            return set()
46*e5dd7070Spatrick        else:
47*e5dd7070Spatrick            return self.delta(changes, self.split(changes))
48*e5dd7070Spatrick
49*e5dd7070Spatrick    def split(self, S):
50*e5dd7070Spatrick        """split(set) -> [sets]
51*e5dd7070Spatrick
52*e5dd7070Spatrick        Partition a set into one or two pieces.
53*e5dd7070Spatrick        """
54*e5dd7070Spatrick
55*e5dd7070Spatrick        # There are many ways to split, we could do a better job with more
56*e5dd7070Spatrick        # context information (but then the API becomes grosser).
57*e5dd7070Spatrick        L = list(S)
58*e5dd7070Spatrick        mid = len(L)//2
59*e5dd7070Spatrick        if mid==0:
60*e5dd7070Spatrick            return L,
61*e5dd7070Spatrick        else:
62*e5dd7070Spatrick            return L[:mid],L[mid:]
63*e5dd7070Spatrick
64*e5dd7070Spatrick    def delta(self, c, sets):
65*e5dd7070Spatrick        # assert(reduce(set.union, sets, set()) == c)
66*e5dd7070Spatrick
67*e5dd7070Spatrick        # If there is nothing left we can remove, we are done.
68*e5dd7070Spatrick        if len(sets) <= 1:
69*e5dd7070Spatrick            return c
70*e5dd7070Spatrick
71*e5dd7070Spatrick        # Look for a passing subset.
72*e5dd7070Spatrick        res = self.search(c, sets)
73*e5dd7070Spatrick        if res is not None:
74*e5dd7070Spatrick            return res
75*e5dd7070Spatrick
76*e5dd7070Spatrick        # Otherwise, partition sets if possible; if not we are done.
77*e5dd7070Spatrick        refined = sum(map(list, map(self.split, sets)), [])
78*e5dd7070Spatrick        if len(refined) == len(sets):
79*e5dd7070Spatrick            return c
80*e5dd7070Spatrick
81*e5dd7070Spatrick        return self.delta(c, refined)
82*e5dd7070Spatrick
83*e5dd7070Spatrick    def search(self, c, sets):
84*e5dd7070Spatrick        for i,S in enumerate(sets):
85*e5dd7070Spatrick            # If test passes on this subset alone, recurse.
86*e5dd7070Spatrick            if self.getTestResult(S):
87*e5dd7070Spatrick                return self.delta(S, self.split(S))
88*e5dd7070Spatrick
89*e5dd7070Spatrick            # Otherwise if we have more than two sets, see if test
90*e5dd7070Spatrick            # pases without this subset.
91*e5dd7070Spatrick            if len(sets) > 2:
92*e5dd7070Spatrick                complement = sum(sets[:i] + sets[i+1:],[])
93*e5dd7070Spatrick                if self.getTestResult(complement):
94*e5dd7070Spatrick                    return self.delta(complement, sets[:i] + sets[i+1:])
95*e5dd7070Spatrick
96*e5dd7070Spatrick###
97*e5dd7070Spatrick
98*e5dd7070Spatrickclass Token(object):
99*e5dd7070Spatrick    def __init__(self, type, data, flags, file, line, column):
100*e5dd7070Spatrick        self.type   = type
101*e5dd7070Spatrick        self.data   = data
102*e5dd7070Spatrick        self.flags  = flags
103*e5dd7070Spatrick        self.file   = file
104*e5dd7070Spatrick        self.line   = line
105*e5dd7070Spatrick        self.column = column
106*e5dd7070Spatrick
107*e5dd7070SpatrickkTokenRE = re.compile(r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""",
108*e5dd7070Spatrick                      re.DOTALL | re.MULTILINE)
109*e5dd7070Spatrick
110*e5dd7070Spatrickdef getTokens(path):
111*e5dd7070Spatrick    p = subprocess.Popen(['clang','-dump-raw-tokens',path],
112*e5dd7070Spatrick                         stdin=subprocess.PIPE,
113*e5dd7070Spatrick                         stdout=subprocess.PIPE,
114*e5dd7070Spatrick                         stderr=subprocess.PIPE)
115*e5dd7070Spatrick    out,err = p.communicate()
116*e5dd7070Spatrick
117*e5dd7070Spatrick    tokens = []
118*e5dd7070Spatrick    collect = None
119*e5dd7070Spatrick    for ln in err.split('\n'):
120*e5dd7070Spatrick        # Silly programmers refuse to print in simple machine readable
121*e5dd7070Spatrick        # formats. Whatever.
122*e5dd7070Spatrick        if collect is None:
123*e5dd7070Spatrick            collect = ln
124*e5dd7070Spatrick        else:
125*e5dd7070Spatrick            collect = collect + '\n' + ln
126*e5dd7070Spatrick        if 'Loc=<' in ln and ln.endswith('>'):
127*e5dd7070Spatrick            ln,collect = collect,None
128*e5dd7070Spatrick            tokens.append(Token(*kTokenRE.match(ln).groups()))
129*e5dd7070Spatrick
130*e5dd7070Spatrick    return tokens
131*e5dd7070Spatrick
132*e5dd7070Spatrick###
133*e5dd7070Spatrick
134*e5dd7070Spatrickclass TMBDDelta(DeltaAlgorithm):
135*e5dd7070Spatrick    def __init__(self, testProgram, tokenLists, log):
136*e5dd7070Spatrick        def patchName(name, suffix):
137*e5dd7070Spatrick            base,ext = os.path.splitext(name)
138*e5dd7070Spatrick            return base + '.' + suffix + ext
139*e5dd7070Spatrick        super(TMBDDelta, self).__init__()
140*e5dd7070Spatrick        self.testProgram = testProgram
141*e5dd7070Spatrick        self.tokenLists = tokenLists
142*e5dd7070Spatrick        self.tempFiles = [patchName(f,'tmp')
143*e5dd7070Spatrick                            for f,_ in self.tokenLists]
144*e5dd7070Spatrick        self.targetFiles = [patchName(f,'ok')
145*e5dd7070Spatrick                            for f,_ in self.tokenLists]
146*e5dd7070Spatrick        self.log = log
147*e5dd7070Spatrick        self.numTests = 0
148*e5dd7070Spatrick
149*e5dd7070Spatrick    def writeFiles(self, changes, fileNames):
150*e5dd7070Spatrick        assert len(fileNames) == len(self.tokenLists)
151*e5dd7070Spatrick        byFile = [[] for i in self.tokenLists]
152*e5dd7070Spatrick        for i,j in changes:
153*e5dd7070Spatrick            byFile[i].append(j)
154*e5dd7070Spatrick
155*e5dd7070Spatrick        for i,(file,tokens) in enumerate(self.tokenLists):
156*e5dd7070Spatrick            f = open(fileNames[i],'w')
157*e5dd7070Spatrick            for j in byFile[i]:
158*e5dd7070Spatrick                f.write(tokens[j])
159*e5dd7070Spatrick            f.close()
160*e5dd7070Spatrick
161*e5dd7070Spatrick        return byFile
162*e5dd7070Spatrick
163*e5dd7070Spatrick    def test(self, changes):
164*e5dd7070Spatrick        self.numTests += 1
165*e5dd7070Spatrick
166*e5dd7070Spatrick        byFile = self.writeFiles(changes, self.tempFiles)
167*e5dd7070Spatrick
168*e5dd7070Spatrick        if self.log:
169*e5dd7070Spatrick            print('TEST - ', end=' ', file=sys.stderr)
170*e5dd7070Spatrick            if self.log > 1:
171*e5dd7070Spatrick                for i,(file,_) in enumerate(self.tokenLists):
172*e5dd7070Spatrick                    indices = byFile[i]
173*e5dd7070Spatrick                    if i:
174*e5dd7070Spatrick                        sys.stderr.write('\n      ')
175*e5dd7070Spatrick                    sys.stderr.write('%s:%d tokens: [' % (file,len(byFile[i])))
176*e5dd7070Spatrick                    prev = None
177*e5dd7070Spatrick                    for j in byFile[i]:
178*e5dd7070Spatrick                        if prev is None or j != prev + 1:
179*e5dd7070Spatrick                            if prev:
180*e5dd7070Spatrick                                sys.stderr.write('%d][' % prev)
181*e5dd7070Spatrick                            sys.stderr.write(str(j))
182*e5dd7070Spatrick                            sys.stderr.write(':')
183*e5dd7070Spatrick                        prev = j
184*e5dd7070Spatrick                    if byFile[i]:
185*e5dd7070Spatrick                        sys.stderr.write(str(byFile[i][-1]))
186*e5dd7070Spatrick                    sys.stderr.write('] ')
187*e5dd7070Spatrick            else:
188*e5dd7070Spatrick                print(', '.join(['%s:%d tokens' % (file, len(byFile[i]))
189*e5dd7070Spatrick                                               for i,(file,_) in enumerate(self.tokenLists)]), end=' ', file=sys.stderr)
190*e5dd7070Spatrick
191*e5dd7070Spatrick        p = subprocess.Popen([self.testProgram] + self.tempFiles)
192*e5dd7070Spatrick        res = p.wait() == 0
193*e5dd7070Spatrick
194*e5dd7070Spatrick        if res:
195*e5dd7070Spatrick            self.writeFiles(changes, self.targetFiles)
196*e5dd7070Spatrick
197*e5dd7070Spatrick        if self.log:
198*e5dd7070Spatrick            print('=> %s' % res, file=sys.stderr)
199*e5dd7070Spatrick        else:
200*e5dd7070Spatrick            if res:
201*e5dd7070Spatrick                print('\nSUCCESS (%d tokens)' % len(changes))
202*e5dd7070Spatrick            else:
203*e5dd7070Spatrick                sys.stderr.write('.')
204*e5dd7070Spatrick
205*e5dd7070Spatrick        return res
206*e5dd7070Spatrick
207*e5dd7070Spatrick    def run(self):
208*e5dd7070Spatrick        res = super(TMBDDelta, self).run([(i,j)
209*e5dd7070Spatrick                                          for i,(file,tokens) in enumerate(self.tokenLists)
210*e5dd7070Spatrick                                          for j in range(len(tokens))])
211*e5dd7070Spatrick        self.writeFiles(res, self.targetFiles)
212*e5dd7070Spatrick        if not self.log:
213*e5dd7070Spatrick            print(file=sys.stderr)
214*e5dd7070Spatrick        return res
215*e5dd7070Spatrick
216*e5dd7070Spatrickdef tokenBasedMultiDelta(program, files, log):
217*e5dd7070Spatrick    # Read in the lists of tokens.
218*e5dd7070Spatrick    tokenLists = [(file, [t.data for t in getTokens(file)])
219*e5dd7070Spatrick                  for file in files]
220*e5dd7070Spatrick
221*e5dd7070Spatrick    numTokens = sum([len(tokens) for _,tokens in tokenLists])
222*e5dd7070Spatrick    print("Delta on %s with %d tokens." % (', '.join(files), numTokens))
223*e5dd7070Spatrick
224*e5dd7070Spatrick    tbmd = TMBDDelta(program, tokenLists, log)
225*e5dd7070Spatrick
226*e5dd7070Spatrick    res = tbmd.run()
227*e5dd7070Spatrick
228*e5dd7070Spatrick    print("Finished %s with %d tokens (in %d tests)." % (', '.join(tbmd.targetFiles),
229*e5dd7070Spatrick                                                         len(res),
230*e5dd7070Spatrick                                                         tbmd.numTests))
231*e5dd7070Spatrick
232*e5dd7070Spatrickdef main():
233*e5dd7070Spatrick    from optparse import OptionParser, OptionGroup
234*e5dd7070Spatrick    parser = OptionParser("%prog <test program> {files+}")
235*e5dd7070Spatrick    parser.add_option("", "--debug", dest="debugLevel",
236*e5dd7070Spatrick                     help="set debug level [default %default]",
237*e5dd7070Spatrick                     action="store", type=int, default=0)
238*e5dd7070Spatrick    (opts, args) = parser.parse_args()
239*e5dd7070Spatrick
240*e5dd7070Spatrick    if len(args) <= 1:
241*e5dd7070Spatrick        parser.error('Invalid number of arguments.')
242*e5dd7070Spatrick
243*e5dd7070Spatrick    program,files = args[0],args[1:]
244*e5dd7070Spatrick
245*e5dd7070Spatrick    md = tokenBasedMultiDelta(program, files, log=opts.debugLevel)
246*e5dd7070Spatrick
247*e5dd7070Spatrickif __name__ == '__main__':
248*e5dd7070Spatrick    try:
249*e5dd7070Spatrick        main()
250*e5dd7070Spatrick    except KeyboardInterrupt:
251*e5dd7070Spatrick        print('Interrupted.', file=sys.stderr)
252*e5dd7070Spatrick        os._exit(1) # Avoid freeing our giant cache.
253