1*3117ece4Schristos#!/usr/bin/env python3 2*3117ece4Schristos"""Test zstd interoperability between versions""" 3*3117ece4Schristos 4*3117ece4Schristos# ################################################################ 5*3117ece4Schristos# Copyright (c) Meta Platforms, Inc. and affiliates. 6*3117ece4Schristos# All rights reserved. 7*3117ece4Schristos# 8*3117ece4Schristos# This source code is licensed under both the BSD-style license (found in the 9*3117ece4Schristos# LICENSE file in the root directory of this source tree) and the GPLv2 (found 10*3117ece4Schristos# in the COPYING file in the root directory of this source tree). 11*3117ece4Schristos# You may select, at your option, one of the above-listed licenses. 12*3117ece4Schristos# ################################################################ 13*3117ece4Schristos 14*3117ece4Schristosimport filecmp 15*3117ece4Schristosimport glob 16*3117ece4Schristosimport hashlib 17*3117ece4Schristosimport os 18*3117ece4Schristosimport shutil 19*3117ece4Schristosimport sys 20*3117ece4Schristosimport subprocess 21*3117ece4Schristosfrom subprocess import Popen, PIPE 22*3117ece4Schristos 23*3117ece4Schristosrepo_url = 'https://github.com/facebook/zstd.git' 24*3117ece4Schristostmp_dir_name = 'tests/versionsTest' 25*3117ece4Schristosmake_cmd = 'make' 26*3117ece4Schristosmake_args = ['-j','CFLAGS=-O0'] 27*3117ece4Schristosgit_cmd = 'git' 28*3117ece4Schristostest_dat_src = 'README.md' 29*3117ece4Schristostest_dat = 'test_dat' 30*3117ece4Schristoshead = 'vdevel' 31*3117ece4Schristosdict_source = 'dict_source' 32*3117ece4Schristosdict_globs = [ 33*3117ece4Schristos 'programs/*.c', 34*3117ece4Schristos 'lib/common/*.c', 35*3117ece4Schristos 'lib/compress/*.c', 36*3117ece4Schristos 'lib/decompress/*.c', 37*3117ece4Schristos 'lib/dictBuilder/*.c', 38*3117ece4Schristos 'lib/legacy/*.c', 39*3117ece4Schristos 'programs/*.h', 40*3117ece4Schristos 'lib/common/*.h', 41*3117ece4Schristos 'lib/compress/*.h', 42*3117ece4Schristos 'lib/dictBuilder/*.h', 43*3117ece4Schristos 'lib/legacy/*.h' 44*3117ece4Schristos] 45*3117ece4Schristos 46*3117ece4Schristos 47*3117ece4Schristosdef execute(command, print_output=False, print_error=True, param_shell=False): 48*3117ece4Schristos popen = Popen(command, stdout=PIPE, stderr=PIPE, shell=param_shell) 49*3117ece4Schristos stdout_lines, stderr_lines = popen.communicate() 50*3117ece4Schristos stderr_lines = stderr_lines.decode("utf-8") 51*3117ece4Schristos stdout_lines = stdout_lines.decode("utf-8") 52*3117ece4Schristos if print_output: 53*3117ece4Schristos print(stdout_lines) 54*3117ece4Schristos print(stderr_lines) 55*3117ece4Schristos if popen.returncode is not None and popen.returncode != 0: 56*3117ece4Schristos if not print_output and print_error: 57*3117ece4Schristos print(stderr_lines) 58*3117ece4Schristos return popen.returncode 59*3117ece4Schristos 60*3117ece4Schristos 61*3117ece4Schristosdef proc(cmd_args, pipe=True, dummy=False): 62*3117ece4Schristos if dummy: 63*3117ece4Schristos return 64*3117ece4Schristos if pipe: 65*3117ece4Schristos subproc = Popen(cmd_args, stdout=PIPE, stderr=PIPE) 66*3117ece4Schristos else: 67*3117ece4Schristos subproc = Popen(cmd_args) 68*3117ece4Schristos return subproc.communicate() 69*3117ece4Schristos 70*3117ece4Schristos 71*3117ece4Schristosdef make(targets, pipe=True): 72*3117ece4Schristos cmd = [make_cmd] + make_args + targets 73*3117ece4Schristos cmd_str = str(cmd) 74*3117ece4Schristos print('compilation command : ' + cmd_str) 75*3117ece4Schristos return proc(cmd, pipe) 76*3117ece4Schristos 77*3117ece4Schristos 78*3117ece4Schristosdef git(args, pipe=True): 79*3117ece4Schristos return proc([git_cmd] + args, pipe) 80*3117ece4Schristos 81*3117ece4Schristos 82*3117ece4Schristosdef get_git_tags(): 83*3117ece4Schristos stdout, stderr = git(['tag', '-l', 'v[0-9].[0-9].[0-9]']) 84*3117ece4Schristos tags = stdout.decode('utf-8').split() 85*3117ece4Schristos return tags 86*3117ece4Schristos 87*3117ece4Schristos 88*3117ece4Schristosdef dict_ok(tag, dict_name, sample): 89*3117ece4Schristos if not os.path.isfile(dict_name): 90*3117ece4Schristos return False 91*3117ece4Schristos try: 92*3117ece4Schristos cmd = ['./zstd.' + tag, '-D', dict_name] 93*3117ece4Schristos with open(sample, "rb") as i: 94*3117ece4Schristos subprocess.check_call(cmd, stdin=i, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) 95*3117ece4Schristos return True 96*3117ece4Schristos except: 97*3117ece4Schristos return False 98*3117ece4Schristos 99*3117ece4Schristos 100*3117ece4Schristosdef create_dict(tag, dict_source_path, fallback_tag=None): 101*3117ece4Schristos dict_name = 'dict.' + tag 102*3117ece4Schristos if not os.path.isfile(dict_name): 103*3117ece4Schristos cFiles = glob.glob(dict_source_path + "/*.c") 104*3117ece4Schristos hFiles = glob.glob(dict_source_path + "/*.h") 105*3117ece4Schristos # Ensure the dictionary builder is deterministic 106*3117ece4Schristos files = sorted(cFiles + hFiles) 107*3117ece4Schristos if tag == 'v0.5.0': 108*3117ece4Schristos result = execute('./dictBuilder.' + tag + ' ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) 109*3117ece4Schristos else: 110*3117ece4Schristos result = execute('./zstd.' + tag + ' -f --train ' + ' '.join(files) + ' -o ' + dict_name, print_output=False, param_shell=True) 111*3117ece4Schristos if result == 0 and dict_ok(tag, dict_name, files[0]): 112*3117ece4Schristos print(dict_name + ' created') 113*3117ece4Schristos elif fallback_tag is not None: 114*3117ece4Schristos fallback_dict_name = 'dict.' + fallback_tag 115*3117ece4Schristos print('creating dictionary ' + dict_name + ' failed, falling back to ' + fallback_dict_name) 116*3117ece4Schristos shutil.copy(fallback_dict_name, dict_name) 117*3117ece4Schristos else: 118*3117ece4Schristos raise RuntimeError('ERROR: creating of ' + dict_name + ' failed') 119*3117ece4Schristos else: 120*3117ece4Schristos print(dict_name + ' already exists') 121*3117ece4Schristos 122*3117ece4Schristos 123*3117ece4Schristosdef zstd(tag, args, input_file, output_file): 124*3117ece4Schristos """ 125*3117ece4Schristos Zstd compress input_file to output_file. 126*3117ece4Schristos Need this helper because 0.5.0 is broken when stdout is not a TTY. 127*3117ece4Schristos Throws an exception if the command returns non-zero. 128*3117ece4Schristos """ 129*3117ece4Schristos with open(input_file, "rb") as i: 130*3117ece4Schristos with open(output_file, "wb") as o: 131*3117ece4Schristos cmd = ['./zstd.' + tag] + args 132*3117ece4Schristos print("Running: '{}', input={}, output={}" .format( 133*3117ece4Schristos ' '.join(cmd), input_file, output_file 134*3117ece4Schristos )) 135*3117ece4Schristos result = subprocess.run(cmd, stdin=i, stdout=o, stderr=subprocess.PIPE) 136*3117ece4Schristos print("Stderr: {}".format(result.stderr.decode("ascii"))) 137*3117ece4Schristos result.check_returncode() 138*3117ece4Schristos 139*3117ece4Schristos 140*3117ece4Schristosdef dict_compress_sample(tag, sample): 141*3117ece4Schristos dict_name = 'dict.' + tag 142*3117ece4Schristos verbose = ['-v', '-v', '-v'] 143*3117ece4Schristos zstd(tag, ['-D', dict_name, '-1'] + verbose, sample, sample + '_01_64_' + tag + '_dictio.zst') 144*3117ece4Schristos zstd(tag, ['-D', dict_name, '-3'], sample, sample + '_03_64_' + tag + '_dictio.zst') 145*3117ece4Schristos zstd(tag, ['-D', dict_name, '-5'], sample, sample + '_05_64_' + tag + '_dictio.zst') 146*3117ece4Schristos zstd(tag, ['-D', dict_name, '-9'], sample, sample + '_09_64_' + tag + '_dictio.zst') 147*3117ece4Schristos zstd(tag, ['-D', dict_name, '-15'], sample, sample + '_15_64_' + tag + '_dictio.zst') 148*3117ece4Schristos zstd(tag, ['-D', dict_name, '-18'], sample, sample + '_18_64_' + tag + '_dictio.zst') 149*3117ece4Schristos # zstdFiles = glob.glob("*.zst*") 150*3117ece4Schristos # print(zstdFiles) 151*3117ece4Schristos print(tag + " : dict compression completed") 152*3117ece4Schristos 153*3117ece4Schristos 154*3117ece4Schristosdef compress_sample(tag, sample): 155*3117ece4Schristos zstd(tag, ['-1'], sample, sample + '_01_64_' + tag + '_nodict.zst') 156*3117ece4Schristos zstd(tag, ['-3'], sample, sample + '_03_64_' + tag + '_nodict.zst') 157*3117ece4Schristos zstd(tag, ['-5'], sample, sample + '_05_64_' + tag + '_nodict.zst') 158*3117ece4Schristos zstd(tag, ['-9'], sample, sample + '_09_64_' + tag + '_nodict.zst') 159*3117ece4Schristos zstd(tag, ['-15'], sample, sample + '_15_64_' + tag + '_nodict.zst') 160*3117ece4Schristos zstd(tag, ['-18'], sample, sample + '_18_64_' + tag + '_nodict.zst') 161*3117ece4Schristos # zstdFiles = glob.glob("*.zst*") 162*3117ece4Schristos # print(zstdFiles) 163*3117ece4Schristos print(tag + " : compression completed") 164*3117ece4Schristos 165*3117ece4Schristos 166*3117ece4Schristos# https://stackoverflow.com/a/19711609/2132223 167*3117ece4Schristosdef sha1_of_file(filepath): 168*3117ece4Schristos with open(filepath, 'rb') as f: 169*3117ece4Schristos return hashlib.sha1(f.read()).hexdigest() 170*3117ece4Schristos 171*3117ece4Schristos 172*3117ece4Schristosdef remove_duplicates(): 173*3117ece4Schristos list_of_zst = sorted(glob.glob('*.zst')) 174*3117ece4Schristos for i, ref_zst in enumerate(list_of_zst): 175*3117ece4Schristos if not os.path.isfile(ref_zst): 176*3117ece4Schristos continue 177*3117ece4Schristos for j in range(i + 1, len(list_of_zst)): 178*3117ece4Schristos compared_zst = list_of_zst[j] 179*3117ece4Schristos if not os.path.isfile(compared_zst): 180*3117ece4Schristos continue 181*3117ece4Schristos if filecmp.cmp(ref_zst, compared_zst): 182*3117ece4Schristos os.remove(compared_zst) 183*3117ece4Schristos print('duplicated : {} == {}'.format(ref_zst, compared_zst)) 184*3117ece4Schristos 185*3117ece4Schristos 186*3117ece4Schristosdef decompress_zst(tag): 187*3117ece4Schristos dec_error = 0 188*3117ece4Schristos list_zst = sorted(glob.glob('*_nodict.zst')) 189*3117ece4Schristos for file_zst in list_zst: 190*3117ece4Schristos print(file_zst + ' ' + tag) 191*3117ece4Schristos file_dec = file_zst + '_d64_' + tag + '.dec' 192*3117ece4Schristos zstd(tag, ['-d'], file_zst, file_dec) 193*3117ece4Schristos if not filecmp.cmp(file_dec, test_dat): 194*3117ece4Schristos raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) 195*3117ece4Schristos else: 196*3117ece4Schristos print('OK ') 197*3117ece4Schristos 198*3117ece4Schristos 199*3117ece4Schristosdef decompress_dict(tag): 200*3117ece4Schristos dec_error = 0 201*3117ece4Schristos list_zst = sorted(glob.glob('*_dictio.zst')) 202*3117ece4Schristos for file_zst in list_zst: 203*3117ece4Schristos dict_tag = file_zst[0:len(file_zst)-11] # remove "_dictio.zst" 204*3117ece4Schristos if head in dict_tag: # find vdevel 205*3117ece4Schristos dict_tag = head 206*3117ece4Schristos else: 207*3117ece4Schristos dict_tag = dict_tag[dict_tag.rfind('v'):] 208*3117ece4Schristos if tag == 'v0.6.0' and dict_tag < 'v0.6.0': 209*3117ece4Schristos continue 210*3117ece4Schristos dict_name = 'dict.' + dict_tag 211*3117ece4Schristos print(file_zst + ' ' + tag + ' dict=' + dict_tag) 212*3117ece4Schristos file_dec = file_zst + '_d64_' + tag + '.dec' 213*3117ece4Schristos zstd(tag, ['-D', dict_name, '-d'], file_zst, file_dec) 214*3117ece4Schristos if not filecmp.cmp(file_dec, test_dat): 215*3117ece4Schristos raise RuntimeError('Decompression failed: tag={} file={}'.format(tag, file_zst)) 216*3117ece4Schristos else: 217*3117ece4Schristos print('OK ') 218*3117ece4Schristos 219*3117ece4Schristos 220*3117ece4Schristosif __name__ == '__main__': 221*3117ece4Schristos error_code = 0 222*3117ece4Schristos base_dir = os.getcwd() + '/..' # /path/to/zstd 223*3117ece4Schristos tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/zstd/tests/versionsTest 224*3117ece4Schristos clone_dir = tmp_dir + '/' + 'zstd' # /path/to/zstd/tests/versionsTest/zstd 225*3117ece4Schristos dict_source_path = tmp_dir + '/' + dict_source # /path/to/zstd/tests/versionsTest/dict_source 226*3117ece4Schristos programs_dir = base_dir + '/programs' # /path/to/zstd/programs 227*3117ece4Schristos os.makedirs(tmp_dir, exist_ok=True) 228*3117ece4Schristos 229*3117ece4Schristos # since Travis clones limited depth, we should clone full repository 230*3117ece4Schristos if not os.path.isdir(clone_dir): 231*3117ece4Schristos git(['clone', repo_url, clone_dir]) 232*3117ece4Schristos 233*3117ece4Schristos shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat) 234*3117ece4Schristos 235*3117ece4Schristos # Retrieve all release tags 236*3117ece4Schristos print('Retrieve all release tags :') 237*3117ece4Schristos os.chdir(clone_dir) 238*3117ece4Schristos alltags = get_git_tags() + [head] 239*3117ece4Schristos tags = [t for t in alltags if t >= 'v0.5.0'] 240*3117ece4Schristos print(tags) 241*3117ece4Schristos 242*3117ece4Schristos # Build all release zstd 243*3117ece4Schristos for tag in tags: 244*3117ece4Schristos os.chdir(base_dir) 245*3117ece4Schristos dst_zstd = '{}/zstd.{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/zstd.<TAG> 246*3117ece4Schristos if not os.path.isfile(dst_zstd) or tag == head: 247*3117ece4Schristos if tag != head: 248*3117ece4Schristos print('-----------------------------------------------') 249*3117ece4Schristos print('compiling ' + tag) 250*3117ece4Schristos print('-----------------------------------------------') 251*3117ece4Schristos r_dir = '{}/{}'.format(tmp_dir, tag) # /path/to/zstd/tests/versionsTest/<TAG> 252*3117ece4Schristos os.makedirs(r_dir, exist_ok=True) 253*3117ece4Schristos os.chdir(clone_dir) 254*3117ece4Schristos git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False) 255*3117ece4Schristos if tag == 'v0.5.0': 256*3117ece4Schristos os.chdir(r_dir + '/dictBuilder') # /path/to/zstd/tests/versionsTest/v0.5.0/dictBuilder 257*3117ece4Schristos make(['clean'], False) # separate 'clean' target to allow parallel build 258*3117ece4Schristos make(['dictBuilder'], False) 259*3117ece4Schristos shutil.copy2('dictBuilder', '{}/dictBuilder.{}'.format(tmp_dir, tag)) 260*3117ece4Schristos os.chdir(r_dir + '/programs') # /path/to/zstd/tests/versionsTest/<TAG>/programs 261*3117ece4Schristos make(['clean'], False) # separate 'clean' target to allow parallel build 262*3117ece4Schristos make(['zstd'], False) 263*3117ece4Schristos else: 264*3117ece4Schristos os.chdir(programs_dir) 265*3117ece4Schristos print('-----------------------------------------------') 266*3117ece4Schristos print('compiling head') 267*3117ece4Schristos print('-----------------------------------------------') 268*3117ece4Schristos make(['zstd'], False) 269*3117ece4Schristos shutil.copy2('zstd', dst_zstd) 270*3117ece4Schristos 271*3117ece4Schristos # remove any remaining *.zst and *.dec from previous test 272*3117ece4Schristos os.chdir(tmp_dir) 273*3117ece4Schristos for compressed in glob.glob("*.zst"): 274*3117ece4Schristos os.remove(compressed) 275*3117ece4Schristos for dec in glob.glob("*.dec"): 276*3117ece4Schristos os.remove(dec) 277*3117ece4Schristos 278*3117ece4Schristos # copy *.c and *.h to a temporary directory ("dict_source") 279*3117ece4Schristos if not os.path.isdir(dict_source_path): 280*3117ece4Schristos os.mkdir(dict_source_path) 281*3117ece4Schristos for dict_glob in dict_globs: 282*3117ece4Schristos files = glob.glob(dict_glob, root_dir=base_dir) 283*3117ece4Schristos for file in files: 284*3117ece4Schristos file = os.path.join(base_dir, file) 285*3117ece4Schristos print("copying " + file + " to " + dict_source_path) 286*3117ece4Schristos shutil.copy(file, dict_source_path) 287*3117ece4Schristos 288*3117ece4Schristos print('-----------------------------------------------') 289*3117ece4Schristos print('Compress test.dat by all released zstd') 290*3117ece4Schristos print('-----------------------------------------------') 291*3117ece4Schristos 292*3117ece4Schristos create_dict(head, dict_source_path) 293*3117ece4Schristos for tag in tags: 294*3117ece4Schristos print(tag) 295*3117ece4Schristos if tag >= 'v0.5.0': 296*3117ece4Schristos create_dict(tag, dict_source_path, head) 297*3117ece4Schristos dict_compress_sample(tag, test_dat) 298*3117ece4Schristos remove_duplicates() 299*3117ece4Schristos decompress_dict(tag) 300*3117ece4Schristos compress_sample(tag, test_dat) 301*3117ece4Schristos remove_duplicates() 302*3117ece4Schristos decompress_zst(tag) 303*3117ece4Schristos 304*3117ece4Schristos print('') 305*3117ece4Schristos print('Enumerate different compressed files') 306*3117ece4Schristos zstds = sorted(glob.glob('*.zst')) 307*3117ece4Schristos for zstd in zstds: 308*3117ece4Schristos print(zstd + ' : ' + repr(os.path.getsize(zstd)) + ', ' + sha1_of_file(zstd)) 309