1#!/usr/bin/env python 2""" A small program to compute checksums of LLVM checkout. 3""" 4from __future__ import absolute_import 5from __future__ import division 6from __future__ import print_function 7 8import hashlib 9import logging 10import re 11import sys 12from argparse import ArgumentParser 13from project_tree import * 14 15SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$") 16 17 18def main(): 19 parser = ArgumentParser() 20 parser.add_argument( 21 "-v", "--verbose", action="store_true", help="enable debug logging" 22 ) 23 parser.add_argument( 24 "-c", 25 "--check", 26 metavar="reference_file", 27 help="read checksums from reference_file and " 28 + "check they match checksums of llvm_path.", 29 ) 30 parser.add_argument( 31 "--partial", 32 action="store_true", 33 help="ignore projects from reference_file " 34 + "that are not checked out in llvm_path.", 35 ) 36 parser.add_argument( 37 "--multi_dir", 38 action="store_true", 39 help="indicates llvm_path contains llvm, checked out " 40 + "into multiple directories, as opposed to a " 41 + "typical single source tree checkout.", 42 ) 43 parser.add_argument("llvm_path") 44 45 args = parser.parse_args() 46 if args.check is not None: 47 with open(args.check, "r") as f: 48 reference_checksums = ReadLLVMChecksums(f) 49 else: 50 reference_checksums = None 51 52 if args.verbose: 53 logging.basicConfig(level=logging.DEBUG) 54 55 llvm_projects = CreateLLVMProjects(not args.multi_dir) 56 checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects) 57 58 if reference_checksums is None: 59 WriteLLVMChecksums(checksums, sys.stdout) 60 sys.exit(0) 61 62 if not ValidateChecksums(reference_checksums, checksums, args.partial): 63 sys.stdout.write("Checksums differ.\nNew checksums:\n") 64 WriteLLVMChecksums(checksums, sys.stdout) 65 sys.stdout.write("Reference checksums:\n") 66 WriteLLVMChecksums(reference_checksums, sys.stdout) 67 sys.exit(1) 68 else: 69 sys.stdout.write("Checksums match.") 70 71 72def ComputeLLVMChecksums(root_path, projects): 73 """Compute checksums for LLVM sources checked out using svn. 74 75 Args: 76 root_path: a directory of llvm checkout. 77 projects: a list of LLVMProject instances, which describe checkout paths, 78 relative to root_path. 79 80 Returns: 81 A dict mapping from project name to project checksum. 82 """ 83 hash_algo = hashlib.sha256 84 85 def collapse_svn_substitutions(contents): 86 # Replace svn substitutions for $Date$ and $LastChangedDate$. 87 # Unfortunately, these are locale-specific. 88 return SVN_DATES_REGEX.sub("$\1$", contents) 89 90 def read_and_collapse_svn_subsitutions(file_path): 91 with open(file_path, "rb") as f: 92 contents = f.read() 93 new_contents = collapse_svn_substitutions(contents) 94 if contents != new_contents: 95 logging.debug("Replaced svn keyword substitutions in %s", file_path) 96 logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents) 97 return new_contents 98 99 project_checksums = dict() 100 # Hash each project. 101 for proj in projects: 102 project_root = os.path.join(root_path, proj.relpath) 103 if not os.path.exists(project_root): 104 logging.info( 105 "Folder %s doesn't exist, skipping project %s", proj.relpath, proj.name 106 ) 107 continue 108 109 files = list() 110 111 def add_file_hash(file_path): 112 if os.path.islink(file_path) and not os.path.exists(file_path): 113 content = os.readlink(file_path) 114 else: 115 content = read_and_collapse_svn_subsitutions(file_path) 116 hasher = hash_algo() 117 hasher.update(content) 118 file_digest = hasher.hexdigest() 119 logging.debug("Checksum %s for file %s", file_digest, file_path) 120 files.append((file_path, file_digest)) 121 122 logging.info("Computing checksum for %s", proj.name) 123 WalkProjectFiles(root_path, projects, proj, add_file_hash) 124 125 # Compute final checksum. 126 files.sort(key=lambda x: x[0]) 127 hasher = hash_algo() 128 for file_path, file_digest in files: 129 file_path = os.path.relpath(file_path, project_root) 130 hasher.update(file_path) 131 hasher.update(file_digest) 132 project_checksums[proj.name] = hasher.hexdigest() 133 return project_checksums 134 135 136def WriteLLVMChecksums(checksums, f): 137 """Writes checksums to a text file. 138 139 Args: 140 checksums: a dict mapping from project name to project checksum (result of 141 ComputeLLVMChecksums). 142 f: a file object to write into. 143 """ 144 145 for proj in sorted(checksums.keys()): 146 f.write("{} {}\n".format(checksums[proj], proj)) 147 148 149def ReadLLVMChecksums(f): 150 """Reads checksums from a text file, produced by WriteLLVMChecksums. 151 152 Returns: 153 A dict, mapping from project name to project checksum. 154 """ 155 checksums = {} 156 while True: 157 line = f.readline() 158 if line == "": 159 break 160 checksum, proj = line.split() 161 checksums[proj] = checksum 162 return checksums 163 164 165def ValidateChecksums(reference_checksums, new_checksums, allow_missing_projects=False): 166 """Validates that reference_checksums and new_checksums match. 167 168 Args: 169 reference_checksums: a dict of reference checksums, mapping from a project 170 name to a project checksum. 171 new_checksums: a dict of checksums to be checked, mapping from a project 172 name to a project checksum. 173 allow_missing_projects: 174 When True, reference_checksums may contain more projects than 175 new_checksums. Projects missing from new_checksums are ignored. 176 When False, new_checksums and reference_checksums must contain checksums 177 for the same set of projects. If there is a project in 178 reference_checksums, missing from new_checksums, ValidateChecksums 179 will return False. 180 181 Returns: 182 True, if checksums match with regards to allow_missing_projects flag value. 183 False, otherwise. 184 """ 185 if not allow_missing_projects: 186 if len(new_checksums) != len(reference_checksums): 187 return False 188 189 for proj, checksum in new_checksums.items(): 190 # We never computed a checksum for this project. 191 if proj not in reference_checksums: 192 return False 193 # Checksum did not match. 194 if reference_checksums[proj] != checksum: 195 return False 196 197 return True 198 199 200if __name__ == "__main__": 201 main() 202