xref: /netbsd-src/external/bsd/zstd/dist/tests/test-zstd-versions.py (revision 3117ece4fc4a4ca4489ba793710b60b0d26bab6c)
1*3117ece4Schristos#!/usr/bin/env python3
2*3117ece4Schristos"""Test zstd interoperability between versions"""
3*3117ece4Schristos
4*3117ece4Schristos# ################################################################
5*3117ece4Schristos# Copyright (c) Meta Platforms, Inc. and affiliates.
6*3117ece4Schristos# All rights reserved.
7*3117ece4Schristos#
8*3117ece4Schristos# This source code is licensed under both the BSD-style license (found in the
9*3117ece4Schristos# LICENSE file in the root directory of this source tree) and the GPLv2 (found
10*3117ece4Schristos# in the COPYING file in the root directory of this source tree).
11*3117ece4Schristos# You may select, at your option, one of the above-listed licenses.
12*3117ece4Schristos# ################################################################
13*3117ece4Schristos
14*3117ece4Schristosimport filecmp
15*3117ece4Schristosimport glob
16*3117ece4Schristosimport hashlib
17*3117ece4Schristosimport os
18*3117ece4Schristosimport shutil
19*3117ece4Schristosimport sys
20*3117ece4Schristosimport subprocess
21*3117ece4Schristosfrom subprocess import Popen, PIPE
22*3117ece4Schristos
23*3117ece4Schristosrepo_url = 'https://github.com/facebook/zstd.git'
24*3117ece4Schristostmp_dir_name = 'tests/versionsTest'
25*3117ece4Schristosmake_cmd = 'make'
26*3117ece4Schristosmake_args = ['-j','CFLAGS=-O0']
27*3117ece4Schristosgit_cmd = 'git'
28*3117ece4Schristostest_dat_src = 'README.md'
29*3117ece4Schristostest_dat = 'test_dat'
30*3117ece4Schristoshead = 'vdevel'
31*3117ece4Schristosdict_source = 'dict_source'
32*3117ece4Schristosdict_globs = [
33*3117ece4Schristos    'programs/*.c',
34*3117ece4Schristos    'lib/common/*.c',
35*3117ece4Schristos    'lib/compress/*.c',
36*3117ece4Schristos    'lib/decompress/*.c',
37*3117ece4Schristos    'lib/dictBuilder/*.c',
38*3117ece4Schristos    'lib/legacy/*.c',
39*3117ece4Schristos    'programs/*.h',
40*3117ece4Schristos    'lib/common/*.h',
41*3117ece4Schristos    'lib/compress/*.h',
42*3117ece4Schristos    'lib/dictBuilder/*.h',
43*3117ece4Schristos    'lib/legacy/*.h'
44*3117ece4Schristos]
45*3117ece4Schristos
46*3117ece4Schristos
47*3117ece4Schristosdef execute(command, print_output=False, print_error=True, param_shell=False):
48*3117ece4Schristos    popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell)
49*3117ece4Schristos    stdout_lines, stderr_lines = popen.communicate()
50*3117ece4Schristos    stderr_lines = stderr_lines.decode("utf-8")
51*3117ece4Schristos    stdout_lines = stdout_lines.decode("utf-8")
52*3117ece4Schristos    if print_output:
53*3117ece4Schristos        print(stdout_lines)
54*3117ece4Schristos        print(stderr_lines)
55*3117ece4Schristos    if popen.returncode is not None and popen.returncode != 0:
56*3117ece4Schristos        if not print_output and print_error:
57*3117ece4Schristos            print(stderr_lines)
58*3117ece4Schristos    return popen.returncode
59*3117ece4Schristos
60*3117ece4Schristos
61*3117ece4Schristosdef proc(cmd_args, pipe=True, dummy=False):
62*3117ece4Schristos    if dummy:
63*3117ece4Schristos        return
64*3117ece4Schristos    if pipe:
65*3117ece4Schristos        subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE)
66*3117ece4Schristos    else:
67*3117ece4Schristos        subproc = Popen(cmd_args)
68*3117ece4Schristos    return subproc.communicate()
69*3117ece4Schristos
70*3117ece4Schristos
71*3117ece4Schristosdef make(targets, pipe=True):
72*3117ece4Schristos    cmd = [make_cmd] + make_args + targets
73*3117ece4Schristos    cmd_str = str(cmd)
74*3117ece4Schristos    print('compilation command : ' + cmd_str)
75*3117ece4Schristos    return proc(cmd, pipe)
76*3117ece4Schristos
77*3117ece4Schristos
78*3117ece4Schristosdef git(args, pipe=True):
79*3117ece4Schristos    return proc([git_cmd] + args, pipe)
80*3117ece4Schristos
81*3117ece4Schristos
82*3117ece4Schristosdef get_git_tags():
83*3117ece4Schristos    stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]'])
84*3117ece4Schristos    tags = stdout.decode('utf-8').split()
85*3117ece4Schristos    return tags
86*3117ece4Schristos
87*3117ece4Schristos
88*3117ece4Schristosdef dict_ok(tag, dict_name, sample):
89*3117ece4Schristos    if not os.path.isfile(dict_name):
90*3117ece4Schristos        return False
91*3117ece4Schristos    try:
92*3117ece4Schristos        cmd = ['./zstd.' + tag, '-D', dict_name]
93*3117ece4Schristos        with open(sample, "rb") as i:
94*3117ece4Schristos            subprocess.check_call(cmd, stdin=i, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
95*3117ece4Schristos        return True
96*3117ece4Schristos    except:
97*3117ece4Schristos        return False
98*3117ece4Schristos
99*3117ece4Schristos
100*3117ece4Schristosdef create_dict(tag, dict_source_path, fallback_tag=None):
101*3117ece4Schristos    dict_name = 'dict.' + tag
102*3117ece4Schristos    if not os.path.isfile(dict_name):
103*3117ece4Schristos        cFiles = glob.glob(dict_source_path + "/*.c")
104*3117ece4Schristos        hFiles = glob.glob(dict_source_path + "/*.h")
105*3117ece4Schristos        # Ensure the dictionary builder is deterministic
106*3117ece4Schristos        files = sorted(cFiles + hFiles)
107*3117ece4Schristos        if tag == 'v0.5.0':
108*3117ece4Schristos            result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
109*3117ece4Schristos        else:
110*3117ece4Schristos            result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True)
111*3117ece4Schristos        if result == 0 and dict_ok(tag, dict_name, files[0]):
112*3117ece4Schristos            print(dict_name + ' created')
113*3117ece4Schristos        elif fallback_tag is not None:
114*3117ece4Schristos            fallback_dict_name = 'dict.' + fallback_tag
115*3117ece4Schristos            print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name)
116*3117ece4Schristos            shutil.copy(fallback_dict_name, dict_name)
117*3117ece4Schristos        else:
118*3117ece4Schristos            raise RuntimeError('ERROR: creating of ' + dict_name + ' failed')
119*3117ece4Schristos    else:
120*3117ece4Schristos        print(dict_name + ' already exists')
121*3117ece4Schristos
122*3117ece4Schristos
123*3117ece4Schristosdef zstd(tag, args, input_file, output_file):
124*3117ece4Schristos    """
125*3117ece4Schristos    Zstd compress input_file to output_file.
126*3117ece4Schristos    Need this helper because 0.5.0 is broken when stdout is not a TTY.
127*3117ece4Schristos    Throws an exception if the command returns non-zero.
128*3117ece4Schristos    """
129*3117ece4Schristos    with open(input_file, "rb") as i:
130*3117ece4Schristos        with open(output_file, "wb") as o:
131*3117ece4Schristos            cmd = ['./zstd.' + tag] + args
132*3117ece4Schristos            print("Running: '{}', input={}, output={}" .format(
133*3117ece4Schristos                ' '.join(cmd), input_file, output_file
134*3117ece4Schristos            ))
135*3117ece4Schristos            result = subprocess.run(cmd, stdin=i, stdout=o, stderr=subprocess.PIPE)
136*3117ece4Schristos            print("Stderr: {}".format(result.stderr.decode("ascii")))
137*3117ece4Schristos            result.check_returncode()
138*3117ece4Schristos
139*3117ece4Schristos
140*3117ece4Schristosdef dict_compress_sample(tag, sample):
141*3117ece4Schristos    dict_name = 'dict.' + tag
142*3117ece4Schristos    verbose = ['-v', '-v', '-v']
143*3117ece4Schristos    zstd(tag, ['-D', dict_name, '-1'] + verbose, sample, sample + '_01_64_' + tag + '_dictio.zst')
144*3117ece4Schristos    zstd(tag, ['-D', dict_name, '-3'], sample, sample + '_03_64_' + tag + '_dictio.zst')
145*3117ece4Schristos    zstd(tag, ['-D', dict_name, '-5'], sample, sample + '_05_64_' + tag + '_dictio.zst')
146*3117ece4Schristos    zstd(tag, ['-D', dict_name, '-9'], sample, sample + '_09_64_' + tag + '_dictio.zst')
147*3117ece4Schristos    zstd(tag, ['-D', dict_name, '-15'], sample, sample + '_15_64_' + tag + '_dictio.zst')
148*3117ece4Schristos    zstd(tag, ['-D', dict_name, '-18'], sample, sample + '_18_64_' + tag + '_dictio.zst')
149*3117ece4Schristos    # zstdFiles = glob.glob("*.zst*")
150*3117ece4Schristos    # print(zstdFiles)
151*3117ece4Schristos    print(tag + " : dict compression completed")
152*3117ece4Schristos
153*3117ece4Schristos
154*3117ece4Schristosdef compress_sample(tag, sample):
155*3117ece4Schristos    zstd(tag, ['-1'], sample, sample + '_01_64_' + tag + '_nodict.zst')
156*3117ece4Schristos    zstd(tag, ['-3'], sample, sample + '_03_64_' + tag + '_nodict.zst')
157*3117ece4Schristos    zstd(tag, ['-5'], sample, sample + '_05_64_' + tag + '_nodict.zst')
158*3117ece4Schristos    zstd(tag, ['-9'], sample, sample + '_09_64_' + tag + '_nodict.zst')
159*3117ece4Schristos    zstd(tag, ['-15'], sample, sample + '_15_64_' + tag + '_nodict.zst')
160*3117ece4Schristos    zstd(tag, ['-18'], sample, sample + '_18_64_' + tag + '_nodict.zst')
161*3117ece4Schristos    # zstdFiles = glob.glob("*.zst*")
162*3117ece4Schristos    # print(zstdFiles)
163*3117ece4Schristos    print(tag + " : compression completed")
164*3117ece4Schristos
165*3117ece4Schristos
166*3117ece4Schristos# https://stackoverflow.com/a/19711609/2132223
167*3117ece4Schristosdef sha1_of_file(filepath):
168*3117ece4Schristos    with open(filepath, 'rb') as f:
169*3117ece4Schristos        return hashlib.sha1(f.read()).hexdigest()
170*3117ece4Schristos
171*3117ece4Schristos
172*3117ece4Schristosdef remove_duplicates():
173*3117ece4Schristos    list_of_zst = sorted(glob.glob('*.zst'))
174*3117ece4Schristos    for i, ref_zst in enumerate(list_of_zst):
175*3117ece4Schristos        if not os.path.isfile(ref_zst):
176*3117ece4Schristos            continue
177*3117ece4Schristos        for j in range(i + 1, len(list_of_zst)):
178*3117ece4Schristos            compared_zst = list_of_zst[j]
179*3117ece4Schristos            if not os.path.isfile(compared_zst):
180*3117ece4Schristos                continue
181*3117ece4Schristos            if filecmp.cmp(ref_zst, compared_zst):
182*3117ece4Schristos                os.remove(compared_zst)
183*3117ece4Schristos                print('duplicated : {} == {}'.format(ref_zst, compared_zst))
184*3117ece4Schristos
185*3117ece4Schristos
186*3117ece4Schristosdef decompress_zst(tag):
187*3117ece4Schristos    dec_error = 0
188*3117ece4Schristos    list_zst = sorted(glob.glob('*_nodict.zst'))
189*3117ece4Schristos    for file_zst in list_zst:
190*3117ece4Schristos        print(file_zst + ' ' + tag)
191*3117ece4Schristos        file_dec = file_zst + '_d64_' + tag + '.dec'
192*3117ece4Schristos        zstd(tag, ['-d'], file_zst, file_dec)
193*3117ece4Schristos        if not filecmp.cmp(file_dec, test_dat):
194*3117ece4Schristos            raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst))
195*3117ece4Schristos        else:
196*3117ece4Schristos            print('OK     ')
197*3117ece4Schristos
198*3117ece4Schristos
199*3117ece4Schristosdef decompress_dict(tag):
200*3117ece4Schristos    dec_error = 0
201*3117ece4Schristos    list_zst = sorted(glob.glob('*_dictio.zst'))
202*3117ece4Schristos    for file_zst in list_zst:
203*3117ece4Schristos        dict_tag = file_zst[0:len(file_zst)-11]  # remove "_dictio.zst"
204*3117ece4Schristos        if head in dict_tag: # find vdevel
205*3117ece4Schristos            dict_tag = head
206*3117ece4Schristos        else:
207*3117ece4Schristos            dict_tag = dict_tag[dict_tag.rfind('v'):]
208*3117ece4Schristos        if tag == 'v0.6.0' and dict_tag < 'v0.6.0':
209*3117ece4Schristos            continue
210*3117ece4Schristos        dict_name = 'dict.' + dict_tag
211*3117ece4Schristos        print(file_zst + ' ' + tag + ' dict=' + dict_tag)
212*3117ece4Schristos        file_dec = file_zst + '_d64_' + tag + '.dec'
213*3117ece4Schristos        zstd(tag, ['-D', dict_name, '-d'], file_zst, file_dec)
214*3117ece4Schristos        if not filecmp.cmp(file_dec, test_dat):
215*3117ece4Schristos            raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst))
216*3117ece4Schristos        else:
217*3117ece4Schristos            print('OK     ')
218*3117ece4Schristos
219*3117ece4Schristos
220*3117ece4Schristosif __name__ == '__main__':
221*3117ece4Schristos    error_code = 0
222*3117ece4Schristos    base_dir = os.getcwd() + '/..'                  # /path/to/zstd
223*3117ece4Schristos    tmp_dir = base_dir + '/' + tmp_dir_name         # /path/to/zstd/tests/versionsTest
224*3117ece4Schristos    clone_dir = tmp_dir + '/' + 'zstd'              # /path/to/zstd/tests/versionsTest/zstd
225*3117ece4Schristos    dict_source_path = tmp_dir + '/' + dict_source  # /path/to/zstd/tests/versionsTest/dict_source
226*3117ece4Schristos    programs_dir = base_dir + '/programs'           # /path/to/zstd/programs
227*3117ece4Schristos    os.makedirs(tmp_dir, exist_ok=True)
228*3117ece4Schristos
229*3117ece4Schristos    # since Travis clones limited depth, we should clone full repository
230*3117ece4Schristos    if not os.path.isdir(clone_dir):
231*3117ece4Schristos        git(['clone', repo_url, clone_dir])
232*3117ece4Schristos
233*3117ece4Schristos    shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat)
234*3117ece4Schristos
235*3117ece4Schristos    # Retrieve all release tags
236*3117ece4Schristos    print('Retrieve all release tags :')
237*3117ece4Schristos    os.chdir(clone_dir)
238*3117ece4Schristos    alltags = get_git_tags() + [head]
239*3117ece4Schristos    tags = [t for t in alltags if t >= 'v0.5.0']
240*3117ece4Schristos    print(tags)
241*3117ece4Schristos
242*3117ece4Schristos    # Build all release zstd
243*3117ece4Schristos    for tag in tags:
244*3117ece4Schristos        os.chdir(base_dir)
245*3117ece4Schristos        dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag)  # /path/to/zstd/tests/versionsTest/zstd.<TAG>
246*3117ece4Schristos        if not os.path.isfile(dst_zstd) or tag == head:
247*3117ece4Schristos            if tag != head:
248*3117ece4Schristos                print('-----------------------------------------------')
249*3117ece4Schristos                print('compiling ' + tag)
250*3117ece4Schristos                print('-----------------------------------------------')
251*3117ece4Schristos                r_dir = '{}/{}'.format(tmp_dir, tag)  # /path/to/zstd/tests/versionsTest/<TAG>
252*3117ece4Schristos                os.makedirs(r_dir, exist_ok=True)
253*3117ece4Schristos                os.chdir(clone_dir)
254*3117ece4Schristos                git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False)
255*3117ece4Schristos                if tag == 'v0.5.0':
256*3117ece4Schristos                    os.chdir(r_dir + '/dictBuilder')  # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder
257*3117ece4Schristos                    make(['clean'], False)   # separate 'clean' target to allow parallel build
258*3117ece4Schristos                    make(['dictBuilder'], False)
259*3117ece4Schristos                    shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag))
260*3117ece4Schristos                os.chdir(r_dir + '/programs')  # /path/to/zstd/tests/versionsTest/<TAG>/programs
261*3117ece4Schristos                make(['clean'], False)  # separate 'clean' target to allow parallel build
262*3117ece4Schristos                make(['zstd'], False)
263*3117ece4Schristos            else:
264*3117ece4Schristos                os.chdir(programs_dir)
265*3117ece4Schristos                print('-----------------------------------------------')
266*3117ece4Schristos                print('compiling head')
267*3117ece4Schristos                print('-----------------------------------------------')
268*3117ece4Schristos                make(['zstd'], False)
269*3117ece4Schristos            shutil.copy2('zstd', dst_zstd)
270*3117ece4Schristos
271*3117ece4Schristos    # remove any remaining *.zst and *.dec from previous test
272*3117ece4Schristos    os.chdir(tmp_dir)
273*3117ece4Schristos    for compressed in glob.glob("*.zst"):
274*3117ece4Schristos        os.remove(compressed)
275*3117ece4Schristos    for dec in glob.glob("*.dec"):
276*3117ece4Schristos        os.remove(dec)
277*3117ece4Schristos
278*3117ece4Schristos    # copy *.c and *.h to a temporary directory ("dict_source")
279*3117ece4Schristos    if not os.path.isdir(dict_source_path):
280*3117ece4Schristos        os.mkdir(dict_source_path)
281*3117ece4Schristos        for dict_glob in dict_globs:
282*3117ece4Schristos            files = glob.glob(dict_glob, root_dir=base_dir)
283*3117ece4Schristos            for file in files:
284*3117ece4Schristos                file = os.path.join(base_dir, file)
285*3117ece4Schristos                print("copying " + file + " to " + dict_source_path)
286*3117ece4Schristos                shutil.copy(file, dict_source_path)
287*3117ece4Schristos
288*3117ece4Schristos    print('-----------------------------------------------')
289*3117ece4Schristos    print('Compress test.dat by all released zstd')
290*3117ece4Schristos    print('-----------------------------------------------')
291*3117ece4Schristos
292*3117ece4Schristos    create_dict(head, dict_source_path)
293*3117ece4Schristos    for tag in tags:
294*3117ece4Schristos        print(tag)
295*3117ece4Schristos        if tag >= 'v0.5.0':
296*3117ece4Schristos            create_dict(tag, dict_source_path, head)
297*3117ece4Schristos            dict_compress_sample(tag, test_dat)
298*3117ece4Schristos            remove_duplicates()
299*3117ece4Schristos            decompress_dict(tag)
300*3117ece4Schristos        compress_sample(tag, test_dat)
301*3117ece4Schristos        remove_duplicates()
302*3117ece4Schristos        decompress_zst(tag)
303*3117ece4Schristos
304*3117ece4Schristos    print('')
305*3117ece4Schristos    print('Enumerate different compressed files')
306*3117ece4Schristos    zstds = sorted(glob.glob('*.zst'))
307*3117ece4Schristos    for zstd in zstds:
308*3117ece4Schristos        print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd))
309