xref: /freebsd-src/contrib/cortex-strings/scripts/bench.py (revision 8c4282b370bd66908b45b6a223226a9fc2b69d57)
1*09a53ad8SAndrew Turner#!/usr/bin/env python
2*09a53ad8SAndrew Turner
3*09a53ad8SAndrew Turner"""Simple harness that benchmarks different variants of the routines,
4*09a53ad8SAndrew Turnercaches the results, and emits all of the records at the end.
5*09a53ad8SAndrew Turner
6*09a53ad8SAndrew TurnerResults are generated for different values of:
7*09a53ad8SAndrew Turner * Source
8*09a53ad8SAndrew Turner * Routine
9*09a53ad8SAndrew Turner * Length
10*09a53ad8SAndrew Turner * Alignment
11*09a53ad8SAndrew Turner"""
12*09a53ad8SAndrew Turner
13*09a53ad8SAndrew Turnerimport argparse
14*09a53ad8SAndrew Turnerimport subprocess
15*09a53ad8SAndrew Turnerimport math
16*09a53ad8SAndrew Turnerimport sys
17*09a53ad8SAndrew Turner
18*09a53ad8SAndrew Turner# Prefix to the executables
19*09a53ad8SAndrew Turnerbuild = '../build/try-'
20*09a53ad8SAndrew Turner
21*09a53ad8SAndrew TurnerALL = 'memchr memcmp memcpy memset strchr strcmp strcpy strlen'
22*09a53ad8SAndrew Turner
23*09a53ad8SAndrew TurnerHAS = {
24*09a53ad8SAndrew Turner    'this': 'bounce memchr memcpy memset strchr strcmp strcpy strlen',
25*09a53ad8SAndrew Turner    'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen',
26*09a53ad8SAndrew Turner    'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen',
27*09a53ad8SAndrew Turner    'bionic-c': ALL,
28*09a53ad8SAndrew Turner    'csl': 'memcpy memset',
29*09a53ad8SAndrew Turner    'glibc': 'memcpy memset strchr strlen',
30*09a53ad8SAndrew Turner    'glibc-c': ALL,
31*09a53ad8SAndrew Turner    'newlib': 'memcpy strcmp strcpy strlen',
32*09a53ad8SAndrew Turner    'newlib-c': ALL,
33*09a53ad8SAndrew Turner    'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen',
34*09a53ad8SAndrew Turner    'plain': 'memset memcpy strcmp strcpy',
35*09a53ad8SAndrew Turner}
36*09a53ad8SAndrew Turner
37*09a53ad8SAndrew TurnerBOUNCE_ALIGNMENTS = ['1']
38*09a53ad8SAndrew TurnerSINGLE_BUFFER_ALIGNMENTS = ['1', '2', '4', '8', '16', '32']
39*09a53ad8SAndrew TurnerDUAL_BUFFER_ALIGNMENTS = ['1:32', '2:32', '4:32', '8:32', '16:32', '32:32']
40*09a53ad8SAndrew Turner
41*09a53ad8SAndrew TurnerALIGNMENTS = {
42*09a53ad8SAndrew Turner    'bounce': BOUNCE_ALIGNMENTS,
43*09a53ad8SAndrew Turner    'memchr': SINGLE_BUFFER_ALIGNMENTS,
44*09a53ad8SAndrew Turner    'memset': SINGLE_BUFFER_ALIGNMENTS,
45*09a53ad8SAndrew Turner    'strchr': SINGLE_BUFFER_ALIGNMENTS,
46*09a53ad8SAndrew Turner    'strlen': SINGLE_BUFFER_ALIGNMENTS,
47*09a53ad8SAndrew Turner    'memcmp': DUAL_BUFFER_ALIGNMENTS,
48*09a53ad8SAndrew Turner    'memcpy': DUAL_BUFFER_ALIGNMENTS,
49*09a53ad8SAndrew Turner    'strcmp': DUAL_BUFFER_ALIGNMENTS,
50*09a53ad8SAndrew Turner    'strcpy': DUAL_BUFFER_ALIGNMENTS,
51*09a53ad8SAndrew Turner}
52*09a53ad8SAndrew Turner
53*09a53ad8SAndrew TurnerVARIANTS = sorted(HAS.keys())
54*09a53ad8SAndrew TurnerFUNCTIONS = sorted(ALIGNMENTS.keys())
55*09a53ad8SAndrew Turner
56*09a53ad8SAndrew TurnerNUM_RUNS = 5
57*09a53ad8SAndrew Turner
58*09a53ad8SAndrew Turnerdef run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False):
59*09a53ad8SAndrew Turner    """Perform a single run, exercising the cache as appropriate."""
60*09a53ad8SAndrew Turner    key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id))
61*09a53ad8SAndrew Turner
62*09a53ad8SAndrew Turner    if key in cache:
63*09a53ad8SAndrew Turner        got = cache[key]
64*09a53ad8SAndrew Turner    else:
65*09a53ad8SAndrew Turner        xbuild = build
66*09a53ad8SAndrew Turner        cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals()
67*09a53ad8SAndrew Turner
68*09a53ad8SAndrew Turner        try:
69*09a53ad8SAndrew Turner            got = subprocess.check_output(cmd.split()).strip()
70*09a53ad8SAndrew Turner        except OSError, ex:
71*09a53ad8SAndrew Turner            assert False, 'Error %s while running %s' % (ex, cmd)
72*09a53ad8SAndrew Turner
73*09a53ad8SAndrew Turner    parts = got.split(':')
74*09a53ad8SAndrew Turner    took = float(parts[7])
75*09a53ad8SAndrew Turner
76*09a53ad8SAndrew Turner    cache[key] = got
77*09a53ad8SAndrew Turner
78*09a53ad8SAndrew Turner    if not quiet:
79*09a53ad8SAndrew Turner        print got
80*09a53ad8SAndrew Turner        sys.stdout.flush()
81*09a53ad8SAndrew Turner
82*09a53ad8SAndrew Turner    return took
83*09a53ad8SAndrew Turner
84*09a53ad8SAndrew Turnerdef run_many(cache, variants, bytes, all_functions):
85*09a53ad8SAndrew Turner    # We want the data to come out in a useful order.  So fix an
86*09a53ad8SAndrew Turner    # alignment and function, and do all sizes for a variant first
87*09a53ad8SAndrew Turner    bytes = sorted(bytes)
88*09a53ad8SAndrew Turner    mid = bytes[int(len(bytes)/1.5)]
89*09a53ad8SAndrew Turner
90*09a53ad8SAndrew Turner    if not all_functions:
91*09a53ad8SAndrew Turner        # Use the ordering in 'this' as the default
92*09a53ad8SAndrew Turner        all_functions = HAS['this'].split()
93*09a53ad8SAndrew Turner
94*09a53ad8SAndrew Turner        # Find all other functions
95*09a53ad8SAndrew Turner        for functions in HAS.values():
96*09a53ad8SAndrew Turner            for function in functions.split():
97*09a53ad8SAndrew Turner                if function not in all_functions:
98*09a53ad8SAndrew Turner                    all_functions.append(function)
99*09a53ad8SAndrew Turner
100*09a53ad8SAndrew Turner    for function in all_functions:
101*09a53ad8SAndrew Turner        for alignment in ALIGNMENTS[function]:
102*09a53ad8SAndrew Turner            for variant in variants:
103*09a53ad8SAndrew Turner                if function not in HAS[variant].split():
104*09a53ad8SAndrew Turner                    continue
105*09a53ad8SAndrew Turner
106*09a53ad8SAndrew Turner                # Run a tracer through and see how long it takes and
107*09a53ad8SAndrew Turner                # adjust the number of loops based on that.  Not great
108*09a53ad8SAndrew Turner                # for memchr() and similar which are O(n), but it will
109*09a53ad8SAndrew Turner                # do
110*09a53ad8SAndrew Turner                f = 50000000
111*09a53ad8SAndrew Turner                want = 5.0
112*09a53ad8SAndrew Turner
113*09a53ad8SAndrew Turner                loops = int(f / math.sqrt(max(1, mid)))
114*09a53ad8SAndrew Turner                took = run(cache, variant, function, mid, loops, alignment, 0,
115*09a53ad8SAndrew Turner                           quiet=True)
116*09a53ad8SAndrew Turner                # Keep it reasonable for silly routines like bounce
117*09a53ad8SAndrew Turner                factor = min(20, max(0.05, want/took))
118*09a53ad8SAndrew Turner                f = f * factor
119*09a53ad8SAndrew Turner
120*09a53ad8SAndrew Turner                # Round f to a few significant figures
121*09a53ad8SAndrew Turner                scale = 10**int(math.log10(f) - 1)
122*09a53ad8SAndrew Turner                f = scale*int(f/scale)
123*09a53ad8SAndrew Turner
124*09a53ad8SAndrew Turner                for b in sorted(bytes):
125*09a53ad8SAndrew Turner                    # Figure out the number of loops to give a roughly consistent run
126*09a53ad8SAndrew Turner                    loops = int(f / math.sqrt(max(1, b)))
127*09a53ad8SAndrew Turner                    for run_id in range(0, NUM_RUNS):
128*09a53ad8SAndrew Turner                        run(cache, variant, function, b, loops, alignment,
129*09a53ad8SAndrew Turner                            run_id)
130*09a53ad8SAndrew Turner
131*09a53ad8SAndrew Turnerdef run_top(cache):
132*09a53ad8SAndrew Turner    parser = argparse.ArgumentParser()
133*09a53ad8SAndrew Turner    parser.add_argument("-v", "--variants", nargs="+", help="library variant to run (run all if not specified)", default = VARIANTS, choices = VARIANTS)
134*09a53ad8SAndrew Turner    parser.add_argument("-f", "--functions", nargs="+", help="function to run (run all if not specified)", default = FUNCTIONS, choices = FUNCTIONS)
135*09a53ad8SAndrew Turner    parser.add_argument("-l", "--limit", type=int, help="upper limit to test to (in bytes)", default = 512*1024)
136*09a53ad8SAndrew Turner    args = parser.parse_args()
137*09a53ad8SAndrew Turner
138*09a53ad8SAndrew Turner    # Test all powers of 2
139*09a53ad8SAndrew Turner    step1 = 2.0
140*09a53ad8SAndrew Turner    # Test intermediate powers of 1.4
141*09a53ad8SAndrew Turner    step2 = 1.4
142*09a53ad8SAndrew Turner
143*09a53ad8SAndrew Turner    bytes = []
144*09a53ad8SAndrew Turner
145*09a53ad8SAndrew Turner    for step in [step1, step2]:
146*09a53ad8SAndrew Turner        if step:
147*09a53ad8SAndrew Turner            # Figure out how many steps get us up to the top
148*09a53ad8SAndrew Turner            steps = int(round(math.log(args.limit) / math.log(step)))
149*09a53ad8SAndrew Turner            bytes.extend([int(step**x) for x in range(0, steps+1)])
150*09a53ad8SAndrew Turner
151*09a53ad8SAndrew Turner    run_many(cache, args.variants, bytes, args.functions)
152*09a53ad8SAndrew Turner
153*09a53ad8SAndrew Turnerdef main():
154*09a53ad8SAndrew Turner    cachename = 'cache.txt'
155*09a53ad8SAndrew Turner
156*09a53ad8SAndrew Turner    cache = {}
157*09a53ad8SAndrew Turner
158*09a53ad8SAndrew Turner    try:
159*09a53ad8SAndrew Turner        with open(cachename) as f:
160*09a53ad8SAndrew Turner            for line in f:
161*09a53ad8SAndrew Turner                line = line.strip()
162*09a53ad8SAndrew Turner                parts = line.split(':')
163*09a53ad8SAndrew Turner                cache[':'.join(parts[:7])] = line
164*09a53ad8SAndrew Turner    except:
165*09a53ad8SAndrew Turner        pass
166*09a53ad8SAndrew Turner
167*09a53ad8SAndrew Turner    try:
168*09a53ad8SAndrew Turner        run_top(cache)
169*09a53ad8SAndrew Turner    finally:
170*09a53ad8SAndrew Turner        with open(cachename, 'w') as f:
171*09a53ad8SAndrew Turner            for line in sorted(cache.values()):
172*09a53ad8SAndrew Turner                print >> f, line
173*09a53ad8SAndrew Turner
174*09a53ad8SAndrew Turnerif __name__ == '__main__':
175*09a53ad8SAndrew Turner    main()
176