1515bc8c1Sserge-sans-paille#!/usr/bin/env python 28b62e088SIlya Biryukov""" A small program to compute checksums of LLVM checkout. 38b62e088SIlya Biryukov""" 48b62e088SIlya Biryukovfrom __future__ import absolute_import 58b62e088SIlya Biryukovfrom __future__ import division 68b62e088SIlya Biryukovfrom __future__ import print_function 78b62e088SIlya Biryukov 88b62e088SIlya Biryukovimport hashlib 98b62e088SIlya Biryukovimport logging 108b62e088SIlya Biryukovimport re 118b62e088SIlya Biryukovimport sys 128b62e088SIlya Biryukovfrom argparse import ArgumentParser 138b62e088SIlya Biryukovfrom project_tree import * 148b62e088SIlya Biryukov 158b62e088SIlya BiryukovSVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$") 168b62e088SIlya Biryukov 178b62e088SIlya Biryukov 188b62e088SIlya Biryukovdef main(): 198b62e088SIlya Biryukov parser = ArgumentParser() 208b62e088SIlya Biryukov parser.add_argument( 21*b71edfaaSTobias Hieta "-v", "--verbose", action="store_true", help="enable debug logging" 22*b71edfaaSTobias Hieta ) 238b62e088SIlya Biryukov parser.add_argument( 248b62e088SIlya Biryukov "-c", 258b62e088SIlya Biryukov "--check", 268b62e088SIlya Biryukov metavar="reference_file", 27*b71edfaaSTobias Hieta help="read checksums from reference_file and " 28*b71edfaaSTobias Hieta + "check they match checksums of llvm_path.", 29*b71edfaaSTobias Hieta ) 308b62e088SIlya Biryukov parser.add_argument( 318b62e088SIlya Biryukov "--partial", 328b62e088SIlya Biryukov action="store_true", 33*b71edfaaSTobias Hieta help="ignore projects from reference_file " 34*b71edfaaSTobias Hieta + "that are not checked out in llvm_path.", 35*b71edfaaSTobias Hieta ) 368b62e088SIlya Biryukov parser.add_argument( 378b62e088SIlya Biryukov "--multi_dir", 388b62e088SIlya Biryukov action="store_true", 39*b71edfaaSTobias Hieta help="indicates llvm_path contains llvm, checked out " 40*b71edfaaSTobias Hieta + "into multiple directories, as opposed to a " 41*b71edfaaSTobias Hieta + "typical single source tree checkout.", 42*b71edfaaSTobias Hieta ) 438b62e088SIlya Biryukov parser.add_argument("llvm_path") 448b62e088SIlya Biryukov 458b62e088SIlya Biryukov args = parser.parse_args() 468b62e088SIlya Biryukov if args.check is not None: 478b62e088SIlya Biryukov with open(args.check, "r") as f: 488b62e088SIlya Biryukov reference_checksums = ReadLLVMChecksums(f) 498b62e088SIlya Biryukov else: 508b62e088SIlya Biryukov reference_checksums = None 518b62e088SIlya Biryukov 528b62e088SIlya Biryukov if args.verbose: 538b62e088SIlya Biryukov logging.basicConfig(level=logging.DEBUG) 548b62e088SIlya Biryukov 558b62e088SIlya Biryukov llvm_projects = CreateLLVMProjects(not args.multi_dir) 568b62e088SIlya Biryukov checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects) 578b62e088SIlya Biryukov 588b62e088SIlya Biryukov if reference_checksums is None: 598b62e088SIlya Biryukov WriteLLVMChecksums(checksums, sys.stdout) 608b62e088SIlya Biryukov sys.exit(0) 618b62e088SIlya Biryukov 628b62e088SIlya Biryukov if not ValidateChecksums(reference_checksums, checksums, args.partial): 638b62e088SIlya Biryukov sys.stdout.write("Checksums differ.\nNew checksums:\n") 648b62e088SIlya Biryukov WriteLLVMChecksums(checksums, sys.stdout) 658b62e088SIlya Biryukov sys.stdout.write("Reference checksums:\n") 668b62e088SIlya Biryukov WriteLLVMChecksums(reference_checksums, sys.stdout) 678b62e088SIlya Biryukov sys.exit(1) 688b62e088SIlya Biryukov else: 698b62e088SIlya Biryukov sys.stdout.write("Checksums match.") 708b62e088SIlya Biryukov 718b62e088SIlya Biryukov 728b62e088SIlya Biryukovdef ComputeLLVMChecksums(root_path, projects): 738b62e088SIlya Biryukov """Compute checksums for LLVM sources checked out using svn. 748b62e088SIlya Biryukov 758b62e088SIlya Biryukov Args: 768b62e088SIlya Biryukov root_path: a directory of llvm checkout. 778b62e088SIlya Biryukov projects: a list of LLVMProject instances, which describe checkout paths, 788b62e088SIlya Biryukov relative to root_path. 798b62e088SIlya Biryukov 808b62e088SIlya Biryukov Returns: 818b62e088SIlya Biryukov A dict mapping from project name to project checksum. 828b62e088SIlya Biryukov """ 838b62e088SIlya Biryukov hash_algo = hashlib.sha256 848b62e088SIlya Biryukov 858b62e088SIlya Biryukov def collapse_svn_substitutions(contents): 868b62e088SIlya Biryukov # Replace svn substitutions for $Date$ and $LastChangedDate$. 878b62e088SIlya Biryukov # Unfortunately, these are locale-specific. 888b62e088SIlya Biryukov return SVN_DATES_REGEX.sub("$\1$", contents) 898b62e088SIlya Biryukov 908b62e088SIlya Biryukov def read_and_collapse_svn_subsitutions(file_path): 918b62e088SIlya Biryukov with open(file_path, "rb") as f: 928b62e088SIlya Biryukov contents = f.read() 938b62e088SIlya Biryukov new_contents = collapse_svn_substitutions(contents) 948b62e088SIlya Biryukov if contents != new_contents: 958b62e088SIlya Biryukov logging.debug("Replaced svn keyword substitutions in %s", file_path) 968b62e088SIlya Biryukov logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents) 978b62e088SIlya Biryukov return new_contents 988b62e088SIlya Biryukov 998b62e088SIlya Biryukov project_checksums = dict() 1008b62e088SIlya Biryukov # Hash each project. 1018b62e088SIlya Biryukov for proj in projects: 1028b62e088SIlya Biryukov project_root = os.path.join(root_path, proj.relpath) 1038b62e088SIlya Biryukov if not os.path.exists(project_root): 104*b71edfaaSTobias Hieta logging.info( 105*b71edfaaSTobias Hieta "Folder %s doesn't exist, skipping project %s", proj.relpath, proj.name 106*b71edfaaSTobias Hieta ) 1078b62e088SIlya Biryukov continue 1088b62e088SIlya Biryukov 1098b62e088SIlya Biryukov files = list() 1108b62e088SIlya Biryukov 1118b62e088SIlya Biryukov def add_file_hash(file_path): 1128b62e088SIlya Biryukov if os.path.islink(file_path) and not os.path.exists(file_path): 1138b62e088SIlya Biryukov content = os.readlink(file_path) 1148b62e088SIlya Biryukov else: 1158b62e088SIlya Biryukov content = read_and_collapse_svn_subsitutions(file_path) 1168b62e088SIlya Biryukov hasher = hash_algo() 1178b62e088SIlya Biryukov hasher.update(content) 1188b62e088SIlya Biryukov file_digest = hasher.hexdigest() 1198b62e088SIlya Biryukov logging.debug("Checksum %s for file %s", file_digest, file_path) 1208b62e088SIlya Biryukov files.append((file_path, file_digest)) 1218b62e088SIlya Biryukov 1228b62e088SIlya Biryukov logging.info("Computing checksum for %s", proj.name) 1238b62e088SIlya Biryukov WalkProjectFiles(root_path, projects, proj, add_file_hash) 1248b62e088SIlya Biryukov 1258b62e088SIlya Biryukov # Compute final checksum. 1268b62e088SIlya Biryukov files.sort(key=lambda x: x[0]) 1278b62e088SIlya Biryukov hasher = hash_algo() 1288b62e088SIlya Biryukov for file_path, file_digest in files: 1298b62e088SIlya Biryukov file_path = os.path.relpath(file_path, project_root) 1308b62e088SIlya Biryukov hasher.update(file_path) 1318b62e088SIlya Biryukov hasher.update(file_digest) 1328b62e088SIlya Biryukov project_checksums[proj.name] = hasher.hexdigest() 1338b62e088SIlya Biryukov return project_checksums 1348b62e088SIlya Biryukov 1358b62e088SIlya Biryukov 1368b62e088SIlya Biryukovdef WriteLLVMChecksums(checksums, f): 1378b62e088SIlya Biryukov """Writes checksums to a text file. 1388b62e088SIlya Biryukov 1398b62e088SIlya Biryukov Args: 1408b62e088SIlya Biryukov checksums: a dict mapping from project name to project checksum (result of 1418b62e088SIlya Biryukov ComputeLLVMChecksums). 1428b62e088SIlya Biryukov f: a file object to write into. 1438b62e088SIlya Biryukov """ 1448b62e088SIlya Biryukov 1458b62e088SIlya Biryukov for proj in sorted(checksums.keys()): 1468b62e088SIlya Biryukov f.write("{} {}\n".format(checksums[proj], proj)) 1478b62e088SIlya Biryukov 1488b62e088SIlya Biryukov 1498b62e088SIlya Biryukovdef ReadLLVMChecksums(f): 1508b62e088SIlya Biryukov """Reads checksums from a text file, produced by WriteLLVMChecksums. 1518b62e088SIlya Biryukov 1528b62e088SIlya Biryukov Returns: 1538b62e088SIlya Biryukov A dict, mapping from project name to project checksum. 1548b62e088SIlya Biryukov """ 1558b62e088SIlya Biryukov checksums = {} 1568b62e088SIlya Biryukov while True: 1578b62e088SIlya Biryukov line = f.readline() 1588b62e088SIlya Biryukov if line == "": 1598b62e088SIlya Biryukov break 1608b62e088SIlya Biryukov checksum, proj = line.split() 1618b62e088SIlya Biryukov checksums[proj] = checksum 1628b62e088SIlya Biryukov return checksums 1638b62e088SIlya Biryukov 1648b62e088SIlya Biryukov 165*b71edfaaSTobias Hietadef ValidateChecksums(reference_checksums, new_checksums, allow_missing_projects=False): 1668b62e088SIlya Biryukov """Validates that reference_checksums and new_checksums match. 1678b62e088SIlya Biryukov 1688b62e088SIlya Biryukov Args: 1698b62e088SIlya Biryukov reference_checksums: a dict of reference checksums, mapping from a project 1708b62e088SIlya Biryukov name to a project checksum. 1718b62e088SIlya Biryukov new_checksums: a dict of checksums to be checked, mapping from a project 1728b62e088SIlya Biryukov name to a project checksum. 1738b62e088SIlya Biryukov allow_missing_projects: 1748b62e088SIlya Biryukov When True, reference_checksums may contain more projects than 1758b62e088SIlya Biryukov new_checksums. Projects missing from new_checksums are ignored. 1768b62e088SIlya Biryukov When False, new_checksums and reference_checksums must contain checksums 1778b62e088SIlya Biryukov for the same set of projects. If there is a project in 1788b62e088SIlya Biryukov reference_checksums, missing from new_checksums, ValidateChecksums 1798b62e088SIlya Biryukov will return False. 1808b62e088SIlya Biryukov 1818b62e088SIlya Biryukov Returns: 1828b62e088SIlya Biryukov True, if checksums match with regards to allow_missing_projects flag value. 1838b62e088SIlya Biryukov False, otherwise. 1848b62e088SIlya Biryukov """ 1858b62e088SIlya Biryukov if not allow_missing_projects: 1868b62e088SIlya Biryukov if len(new_checksums) != len(reference_checksums): 1878b62e088SIlya Biryukov return False 1888b62e088SIlya Biryukov 189873cba17SSerge Guelton for proj, checksum in new_checksums.items(): 1908b62e088SIlya Biryukov # We never computed a checksum for this project. 1918b62e088SIlya Biryukov if proj not in reference_checksums: 1928b62e088SIlya Biryukov return False 1938b62e088SIlya Biryukov # Checksum did not match. 1948b62e088SIlya Biryukov if reference_checksums[proj] != checksum: 1958b62e088SIlya Biryukov return False 1968b62e088SIlya Biryukov 1978b62e088SIlya Biryukov return True 1988b62e088SIlya Biryukov 1998b62e088SIlya Biryukov 2008b62e088SIlya Biryukovif __name__ == "__main__": 2018b62e088SIlya Biryukov main() 202