xref: /netbsd-src/external/apache2/llvm/dist/clang/utils/token-delta.py (revision 7330f729ccf0bd976a06f95fad452fe774fc7fd1)
1*7330f729Sjoerg#!/usr/bin/env python
2*7330f729Sjoerg
3*7330f729Sjoergfrom __future__ import absolute_import, division, print_function
4*7330f729Sjoergimport os
5*7330f729Sjoergimport re
6*7330f729Sjoergimport subprocess
7*7330f729Sjoergimport sys
8*7330f729Sjoergimport tempfile
9*7330f729Sjoerg
10*7330f729Sjoerg###
11*7330f729Sjoerg
12*7330f729Sjoergclass DeltaAlgorithm(object):
13*7330f729Sjoerg    def __init__(self):
14*7330f729Sjoerg        self.cache = set()
15*7330f729Sjoerg
16*7330f729Sjoerg    def test(self, changes):
17*7330f729Sjoerg        abstract
18*7330f729Sjoerg
19*7330f729Sjoerg    ###
20*7330f729Sjoerg
21*7330f729Sjoerg    def getTestResult(self, changes):
22*7330f729Sjoerg        # There is no reason to cache successful tests because we will
23*7330f729Sjoerg        # always reduce the changeset when we see one.
24*7330f729Sjoerg
25*7330f729Sjoerg        changeset = frozenset(changes)
26*7330f729Sjoerg        if changeset in self.cache:
27*7330f729Sjoerg            return False
28*7330f729Sjoerg        elif not self.test(changes):
29*7330f729Sjoerg            self.cache.add(changeset)
30*7330f729Sjoerg            return False
31*7330f729Sjoerg        else:
32*7330f729Sjoerg            return True
33*7330f729Sjoerg
34*7330f729Sjoerg    def run(self, changes, force=False):
35*7330f729Sjoerg        # Make sure the initial test passes, if not then (a) either
36*7330f729Sjoerg        # the user doesn't expect monotonicity, and we may end up
37*7330f729Sjoerg        # doing O(N^2) tests, or (b) the test is wrong. Avoid the
38*7330f729Sjoerg        # O(N^2) case unless user requests it.
39*7330f729Sjoerg        if not force:
40*7330f729Sjoerg            if not self.getTestResult(changes):
41*7330f729Sjoerg                raise ValueError('Initial test passed to delta fails.')
42*7330f729Sjoerg
43*7330f729Sjoerg        # Check empty set first to quickly find poor test functions.
44*7330f729Sjoerg        if self.getTestResult(set()):
45*7330f729Sjoerg            return set()
46*7330f729Sjoerg        else:
47*7330f729Sjoerg            return self.delta(changes, self.split(changes))
48*7330f729Sjoerg
49*7330f729Sjoerg    def split(self, S):
50*7330f729Sjoerg        """split(set) -> [sets]
51*7330f729Sjoerg
52*7330f729Sjoerg        Partition a set into one or two pieces.
53*7330f729Sjoerg        """
54*7330f729Sjoerg
55*7330f729Sjoerg        # There are many ways to split, we could do a better job with more
56*7330f729Sjoerg        # context information (but then the API becomes grosser).
57*7330f729Sjoerg        L = list(S)
58*7330f729Sjoerg        mid = len(L)//2
59*7330f729Sjoerg        if mid==0:
60*7330f729Sjoerg            return L,
61*7330f729Sjoerg        else:
62*7330f729Sjoerg            return L[:mid],L[mid:]
63*7330f729Sjoerg
64*7330f729Sjoerg    def delta(self, c, sets):
65*7330f729Sjoerg        # assert(reduce(set.union, sets, set()) == c)
66*7330f729Sjoerg
67*7330f729Sjoerg        # If there is nothing left we can remove, we are done.
68*7330f729Sjoerg        if len(sets) <= 1:
69*7330f729Sjoerg            return c
70*7330f729Sjoerg
71*7330f729Sjoerg        # Look for a passing subset.
72*7330f729Sjoerg        res = self.search(c, sets)
73*7330f729Sjoerg        if res is not None:
74*7330f729Sjoerg            return res
75*7330f729Sjoerg
76*7330f729Sjoerg        # Otherwise, partition sets if possible; if not we are done.
77*7330f729Sjoerg        refined = sum(map(list, map(self.split, sets)), [])
78*7330f729Sjoerg        if len(refined) == len(sets):
79*7330f729Sjoerg            return c
80*7330f729Sjoerg
81*7330f729Sjoerg        return self.delta(c, refined)
82*7330f729Sjoerg
83*7330f729Sjoerg    def search(self, c, sets):
84*7330f729Sjoerg        for i,S in enumerate(sets):
85*7330f729Sjoerg            # If test passes on this subset alone, recurse.
86*7330f729Sjoerg            if self.getTestResult(S):
87*7330f729Sjoerg                return self.delta(S, self.split(S))
88*7330f729Sjoerg
89*7330f729Sjoerg            # Otherwise if we have more than two sets, see if test
90*7330f729Sjoerg            # pases without this subset.
91*7330f729Sjoerg            if len(sets) > 2:
92*7330f729Sjoerg                complement = sum(sets[:i] + sets[i+1:],[])
93*7330f729Sjoerg                if self.getTestResult(complement):
94*7330f729Sjoerg                    return self.delta(complement, sets[:i] + sets[i+1:])
95*7330f729Sjoerg
96*7330f729Sjoerg###
97*7330f729Sjoerg
98*7330f729Sjoergclass Token(object):
99*7330f729Sjoerg    def __init__(self, type, data, flags, file, line, column):
100*7330f729Sjoerg        self.type   = type
101*7330f729Sjoerg        self.data   = data
102*7330f729Sjoerg        self.flags  = flags
103*7330f729Sjoerg        self.file   = file
104*7330f729Sjoerg        self.line   = line
105*7330f729Sjoerg        self.column = column
106*7330f729Sjoerg
107*7330f729SjoergkTokenRE = re.compile(r"""([a-z_]+) '(.*)'\t(.*)\tLoc=<(.*):(.*):(.*)>""",
108*7330f729Sjoerg                      re.DOTALL | re.MULTILINE)
109*7330f729Sjoerg
110*7330f729Sjoergdef getTokens(path):
111*7330f729Sjoerg    p = subprocess.Popen(['clang','-dump-raw-tokens',path],
112*7330f729Sjoerg                         stdin=subprocess.PIPE,
113*7330f729Sjoerg                         stdout=subprocess.PIPE,
114*7330f729Sjoerg                         stderr=subprocess.PIPE)
115*7330f729Sjoerg    out,err = p.communicate()
116*7330f729Sjoerg
117*7330f729Sjoerg    tokens = []
118*7330f729Sjoerg    collect = None
119*7330f729Sjoerg    for ln in err.split('\n'):
120*7330f729Sjoerg        # Silly programmers refuse to print in simple machine readable
121*7330f729Sjoerg        # formats. Whatever.
122*7330f729Sjoerg        if collect is None:
123*7330f729Sjoerg            collect = ln
124*7330f729Sjoerg        else:
125*7330f729Sjoerg            collect = collect + '\n' + ln
126*7330f729Sjoerg        if 'Loc=<' in ln and ln.endswith('>'):
127*7330f729Sjoerg            ln,collect = collect,None
128*7330f729Sjoerg            tokens.append(Token(*kTokenRE.match(ln).groups()))
129*7330f729Sjoerg
130*7330f729Sjoerg    return tokens
131*7330f729Sjoerg
132*7330f729Sjoerg###
133*7330f729Sjoerg
134*7330f729Sjoergclass TMBDDelta(DeltaAlgorithm):
135*7330f729Sjoerg    def __init__(self, testProgram, tokenLists, log):
136*7330f729Sjoerg        def patchName(name, suffix):
137*7330f729Sjoerg            base,ext = os.path.splitext(name)
138*7330f729Sjoerg            return base + '.' + suffix + ext
139*7330f729Sjoerg        super(TMBDDelta, self).__init__()
140*7330f729Sjoerg        self.testProgram = testProgram
141*7330f729Sjoerg        self.tokenLists = tokenLists
142*7330f729Sjoerg        self.tempFiles = [patchName(f,'tmp')
143*7330f729Sjoerg                            for f,_ in self.tokenLists]
144*7330f729Sjoerg        self.targetFiles = [patchName(f,'ok')
145*7330f729Sjoerg                            for f,_ in self.tokenLists]
146*7330f729Sjoerg        self.log = log
147*7330f729Sjoerg        self.numTests = 0
148*7330f729Sjoerg
149*7330f729Sjoerg    def writeFiles(self, changes, fileNames):
150*7330f729Sjoerg        assert len(fileNames) == len(self.tokenLists)
151*7330f729Sjoerg        byFile = [[] for i in self.tokenLists]
152*7330f729Sjoerg        for i,j in changes:
153*7330f729Sjoerg            byFile[i].append(j)
154*7330f729Sjoerg
155*7330f729Sjoerg        for i,(file,tokens) in enumerate(self.tokenLists):
156*7330f729Sjoerg            f = open(fileNames[i],'w')
157*7330f729Sjoerg            for j in byFile[i]:
158*7330f729Sjoerg                f.write(tokens[j])
159*7330f729Sjoerg            f.close()
160*7330f729Sjoerg
161*7330f729Sjoerg        return byFile
162*7330f729Sjoerg
163*7330f729Sjoerg    def test(self, changes):
164*7330f729Sjoerg        self.numTests += 1
165*7330f729Sjoerg
166*7330f729Sjoerg        byFile = self.writeFiles(changes, self.tempFiles)
167*7330f729Sjoerg
168*7330f729Sjoerg        if self.log:
169*7330f729Sjoerg            print('TEST - ', end=' ', file=sys.stderr)
170*7330f729Sjoerg            if self.log > 1:
171*7330f729Sjoerg                for i,(file,_) in enumerate(self.tokenLists):
172*7330f729Sjoerg                    indices = byFile[i]
173*7330f729Sjoerg                    if i:
174*7330f729Sjoerg                        sys.stderr.write('\n      ')
175*7330f729Sjoerg                    sys.stderr.write('%s:%d tokens: [' % (file,len(byFile[i])))
176*7330f729Sjoerg                    prev = None
177*7330f729Sjoerg                    for j in byFile[i]:
178*7330f729Sjoerg                        if prev is None or j != prev + 1:
179*7330f729Sjoerg                            if prev:
180*7330f729Sjoerg                                sys.stderr.write('%d][' % prev)
181*7330f729Sjoerg                            sys.stderr.write(str(j))
182*7330f729Sjoerg                            sys.stderr.write(':')
183*7330f729Sjoerg                        prev = j
184*7330f729Sjoerg                    if byFile[i]:
185*7330f729Sjoerg                        sys.stderr.write(str(byFile[i][-1]))
186*7330f729Sjoerg                    sys.stderr.write('] ')
187*7330f729Sjoerg            else:
188*7330f729Sjoerg                print(', '.join(['%s:%d tokens' % (file, len(byFile[i]))
189*7330f729Sjoerg                                               for i,(file,_) in enumerate(self.tokenLists)]), end=' ', file=sys.stderr)
190*7330f729Sjoerg
191*7330f729Sjoerg        p = subprocess.Popen([self.testProgram] + self.tempFiles)
192*7330f729Sjoerg        res = p.wait() == 0
193*7330f729Sjoerg
194*7330f729Sjoerg        if res:
195*7330f729Sjoerg            self.writeFiles(changes, self.targetFiles)
196*7330f729Sjoerg
197*7330f729Sjoerg        if self.log:
198*7330f729Sjoerg            print('=> %s' % res, file=sys.stderr)
199*7330f729Sjoerg        else:
200*7330f729Sjoerg            if res:
201*7330f729Sjoerg                print('\nSUCCESS (%d tokens)' % len(changes))
202*7330f729Sjoerg            else:
203*7330f729Sjoerg                sys.stderr.write('.')
204*7330f729Sjoerg
205*7330f729Sjoerg        return res
206*7330f729Sjoerg
207*7330f729Sjoerg    def run(self):
208*7330f729Sjoerg        res = super(TMBDDelta, self).run([(i,j)
209*7330f729Sjoerg                                          for i,(file,tokens) in enumerate(self.tokenLists)
210*7330f729Sjoerg                                          for j in range(len(tokens))])
211*7330f729Sjoerg        self.writeFiles(res, self.targetFiles)
212*7330f729Sjoerg        if not self.log:
213*7330f729Sjoerg            print(file=sys.stderr)
214*7330f729Sjoerg        return res
215*7330f729Sjoerg
216*7330f729Sjoergdef tokenBasedMultiDelta(program, files, log):
217*7330f729Sjoerg    # Read in the lists of tokens.
218*7330f729Sjoerg    tokenLists = [(file, [t.data for t in getTokens(file)])
219*7330f729Sjoerg                  for file in files]
220*7330f729Sjoerg
221*7330f729Sjoerg    numTokens = sum([len(tokens) for _,tokens in tokenLists])
222*7330f729Sjoerg    print("Delta on %s with %d tokens." % (', '.join(files), numTokens))
223*7330f729Sjoerg
224*7330f729Sjoerg    tbmd = TMBDDelta(program, tokenLists, log)
225*7330f729Sjoerg
226*7330f729Sjoerg    res = tbmd.run()
227*7330f729Sjoerg
228*7330f729Sjoerg    print("Finished %s with %d tokens (in %d tests)." % (', '.join(tbmd.targetFiles),
229*7330f729Sjoerg                                                         len(res),
230*7330f729Sjoerg                                                         tbmd.numTests))
231*7330f729Sjoerg
232*7330f729Sjoergdef main():
233*7330f729Sjoerg    from optparse import OptionParser, OptionGroup
234*7330f729Sjoerg    parser = OptionParser("%prog <test program> {files+}")
235*7330f729Sjoerg    parser.add_option("", "--debug", dest="debugLevel",
236*7330f729Sjoerg                     help="set debug level [default %default]",
237*7330f729Sjoerg                     action="store", type=int, default=0)
238*7330f729Sjoerg    (opts, args) = parser.parse_args()
239*7330f729Sjoerg
240*7330f729Sjoerg    if len(args) <= 1:
241*7330f729Sjoerg        parser.error('Invalid number of arguments.')
242*7330f729Sjoerg
243*7330f729Sjoerg    program,files = args[0],args[1:]
244*7330f729Sjoerg
245*7330f729Sjoerg    md = tokenBasedMultiDelta(program, files, log=opts.debugLevel)
246*7330f729Sjoerg
247*7330f729Sjoergif __name__ == '__main__':
248*7330f729Sjoerg    try:
249*7330f729Sjoerg        main()
250*7330f729Sjoerg    except KeyboardInterrupt:
251*7330f729Sjoerg        print('Interrupted.', file=sys.stderr)
252*7330f729Sjoerg        os._exit(1) # Avoid freeing our giant cache.
253