xref: /llvm-project/llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py (revision b71edfaa4ec3c998aadb35255ce2f60bba2940b0)
1515bc8c1Sserge-sans-paille#!/usr/bin/env python
28b62e088SIlya Biryukov""" A small program to compute checksums of LLVM checkout.
38b62e088SIlya Biryukov"""
48b62e088SIlya Biryukovfrom __future__ import absolute_import
58b62e088SIlya Biryukovfrom __future__ import division
68b62e088SIlya Biryukovfrom __future__ import print_function
78b62e088SIlya Biryukov
88b62e088SIlya Biryukovimport hashlib
98b62e088SIlya Biryukovimport logging
108b62e088SIlya Biryukovimport re
118b62e088SIlya Biryukovimport sys
128b62e088SIlya Biryukovfrom argparse import ArgumentParser
138b62e088SIlya Biryukovfrom project_tree import *
148b62e088SIlya Biryukov
158b62e088SIlya BiryukovSVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$")
168b62e088SIlya Biryukov
178b62e088SIlya Biryukov
188b62e088SIlya Biryukovdef main():
198b62e088SIlya Biryukov    parser = ArgumentParser()
208b62e088SIlya Biryukov    parser.add_argument(
21*b71edfaaSTobias Hieta        "-v", "--verbose", action="store_true", help="enable debug logging"
22*b71edfaaSTobias Hieta    )
238b62e088SIlya Biryukov    parser.add_argument(
248b62e088SIlya Biryukov        "-c",
258b62e088SIlya Biryukov        "--check",
268b62e088SIlya Biryukov        metavar="reference_file",
27*b71edfaaSTobias Hieta        help="read checksums from reference_file and "
28*b71edfaaSTobias Hieta        + "check they match checksums of llvm_path.",
29*b71edfaaSTobias Hieta    )
308b62e088SIlya Biryukov    parser.add_argument(
318b62e088SIlya Biryukov        "--partial",
328b62e088SIlya Biryukov        action="store_true",
33*b71edfaaSTobias Hieta        help="ignore projects from reference_file "
34*b71edfaaSTobias Hieta        + "that are not checked out in llvm_path.",
35*b71edfaaSTobias Hieta    )
368b62e088SIlya Biryukov    parser.add_argument(
378b62e088SIlya Biryukov        "--multi_dir",
388b62e088SIlya Biryukov        action="store_true",
39*b71edfaaSTobias Hieta        help="indicates llvm_path contains llvm, checked out "
40*b71edfaaSTobias Hieta        + "into multiple directories, as opposed to a "
41*b71edfaaSTobias Hieta        + "typical single source tree checkout.",
42*b71edfaaSTobias Hieta    )
438b62e088SIlya Biryukov    parser.add_argument("llvm_path")
448b62e088SIlya Biryukov
458b62e088SIlya Biryukov    args = parser.parse_args()
468b62e088SIlya Biryukov    if args.check is not None:
478b62e088SIlya Biryukov        with open(args.check, "r") as f:
488b62e088SIlya Biryukov            reference_checksums = ReadLLVMChecksums(f)
498b62e088SIlya Biryukov    else:
508b62e088SIlya Biryukov        reference_checksums = None
518b62e088SIlya Biryukov
528b62e088SIlya Biryukov    if args.verbose:
538b62e088SIlya Biryukov        logging.basicConfig(level=logging.DEBUG)
548b62e088SIlya Biryukov
558b62e088SIlya Biryukov    llvm_projects = CreateLLVMProjects(not args.multi_dir)
568b62e088SIlya Biryukov    checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects)
578b62e088SIlya Biryukov
588b62e088SIlya Biryukov    if reference_checksums is None:
598b62e088SIlya Biryukov        WriteLLVMChecksums(checksums, sys.stdout)
608b62e088SIlya Biryukov        sys.exit(0)
618b62e088SIlya Biryukov
628b62e088SIlya Biryukov    if not ValidateChecksums(reference_checksums, checksums, args.partial):
638b62e088SIlya Biryukov        sys.stdout.write("Checksums differ.\nNew checksums:\n")
648b62e088SIlya Biryukov        WriteLLVMChecksums(checksums, sys.stdout)
658b62e088SIlya Biryukov        sys.stdout.write("Reference checksums:\n")
668b62e088SIlya Biryukov        WriteLLVMChecksums(reference_checksums, sys.stdout)
678b62e088SIlya Biryukov        sys.exit(1)
688b62e088SIlya Biryukov    else:
698b62e088SIlya Biryukov        sys.stdout.write("Checksums match.")
708b62e088SIlya Biryukov
718b62e088SIlya Biryukov
728b62e088SIlya Biryukovdef ComputeLLVMChecksums(root_path, projects):
738b62e088SIlya Biryukov    """Compute checksums for LLVM sources checked out using svn.
748b62e088SIlya Biryukov
758b62e088SIlya Biryukov    Args:
768b62e088SIlya Biryukov      root_path: a directory of llvm checkout.
778b62e088SIlya Biryukov      projects: a list of LLVMProject instances, which describe checkout paths,
788b62e088SIlya Biryukov        relative to root_path.
798b62e088SIlya Biryukov
808b62e088SIlya Biryukov    Returns:
818b62e088SIlya Biryukov      A dict mapping from project name to project checksum.
828b62e088SIlya Biryukov    """
838b62e088SIlya Biryukov    hash_algo = hashlib.sha256
848b62e088SIlya Biryukov
858b62e088SIlya Biryukov    def collapse_svn_substitutions(contents):
868b62e088SIlya Biryukov        # Replace svn substitutions for $Date$ and $LastChangedDate$.
878b62e088SIlya Biryukov        # Unfortunately, these are locale-specific.
888b62e088SIlya Biryukov        return SVN_DATES_REGEX.sub("$\1$", contents)
898b62e088SIlya Biryukov
908b62e088SIlya Biryukov    def read_and_collapse_svn_subsitutions(file_path):
918b62e088SIlya Biryukov        with open(file_path, "rb") as f:
928b62e088SIlya Biryukov            contents = f.read()
938b62e088SIlya Biryukov            new_contents = collapse_svn_substitutions(contents)
948b62e088SIlya Biryukov            if contents != new_contents:
958b62e088SIlya Biryukov                logging.debug("Replaced svn keyword substitutions in %s", file_path)
968b62e088SIlya Biryukov                logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents)
978b62e088SIlya Biryukov            return new_contents
988b62e088SIlya Biryukov
998b62e088SIlya Biryukov    project_checksums = dict()
1008b62e088SIlya Biryukov    # Hash each project.
1018b62e088SIlya Biryukov    for proj in projects:
1028b62e088SIlya Biryukov        project_root = os.path.join(root_path, proj.relpath)
1038b62e088SIlya Biryukov        if not os.path.exists(project_root):
104*b71edfaaSTobias Hieta            logging.info(
105*b71edfaaSTobias Hieta                "Folder %s doesn't exist, skipping project %s", proj.relpath, proj.name
106*b71edfaaSTobias Hieta            )
1078b62e088SIlya Biryukov            continue
1088b62e088SIlya Biryukov
1098b62e088SIlya Biryukov        files = list()
1108b62e088SIlya Biryukov
1118b62e088SIlya Biryukov        def add_file_hash(file_path):
1128b62e088SIlya Biryukov            if os.path.islink(file_path) and not os.path.exists(file_path):
1138b62e088SIlya Biryukov                content = os.readlink(file_path)
1148b62e088SIlya Biryukov            else:
1158b62e088SIlya Biryukov                content = read_and_collapse_svn_subsitutions(file_path)
1168b62e088SIlya Biryukov            hasher = hash_algo()
1178b62e088SIlya Biryukov            hasher.update(content)
1188b62e088SIlya Biryukov            file_digest = hasher.hexdigest()
1198b62e088SIlya Biryukov            logging.debug("Checksum %s for file %s", file_digest, file_path)
1208b62e088SIlya Biryukov            files.append((file_path, file_digest))
1218b62e088SIlya Biryukov
1228b62e088SIlya Biryukov        logging.info("Computing checksum for %s", proj.name)
1238b62e088SIlya Biryukov        WalkProjectFiles(root_path, projects, proj, add_file_hash)
1248b62e088SIlya Biryukov
1258b62e088SIlya Biryukov        # Compute final checksum.
1268b62e088SIlya Biryukov        files.sort(key=lambda x: x[0])
1278b62e088SIlya Biryukov        hasher = hash_algo()
1288b62e088SIlya Biryukov        for file_path, file_digest in files:
1298b62e088SIlya Biryukov            file_path = os.path.relpath(file_path, project_root)
1308b62e088SIlya Biryukov            hasher.update(file_path)
1318b62e088SIlya Biryukov            hasher.update(file_digest)
1328b62e088SIlya Biryukov        project_checksums[proj.name] = hasher.hexdigest()
1338b62e088SIlya Biryukov    return project_checksums
1348b62e088SIlya Biryukov
1358b62e088SIlya Biryukov
1368b62e088SIlya Biryukovdef WriteLLVMChecksums(checksums, f):
1378b62e088SIlya Biryukov    """Writes checksums to a text file.
1388b62e088SIlya Biryukov
1398b62e088SIlya Biryukov    Args:
1408b62e088SIlya Biryukov      checksums: a dict mapping from project name to project checksum (result of
1418b62e088SIlya Biryukov        ComputeLLVMChecksums).
1428b62e088SIlya Biryukov      f: a file object to write into.
1438b62e088SIlya Biryukov    """
1448b62e088SIlya Biryukov
1458b62e088SIlya Biryukov    for proj in sorted(checksums.keys()):
1468b62e088SIlya Biryukov        f.write("{} {}\n".format(checksums[proj], proj))
1478b62e088SIlya Biryukov
1488b62e088SIlya Biryukov
1498b62e088SIlya Biryukovdef ReadLLVMChecksums(f):
1508b62e088SIlya Biryukov    """Reads checksums from a text file, produced by WriteLLVMChecksums.
1518b62e088SIlya Biryukov
1528b62e088SIlya Biryukov    Returns:
1538b62e088SIlya Biryukov      A dict, mapping from project name to project checksum.
1548b62e088SIlya Biryukov    """
1558b62e088SIlya Biryukov    checksums = {}
1568b62e088SIlya Biryukov    while True:
1578b62e088SIlya Biryukov        line = f.readline()
1588b62e088SIlya Biryukov        if line == "":
1598b62e088SIlya Biryukov            break
1608b62e088SIlya Biryukov        checksum, proj = line.split()
1618b62e088SIlya Biryukov        checksums[proj] = checksum
1628b62e088SIlya Biryukov    return checksums
1638b62e088SIlya Biryukov
1648b62e088SIlya Biryukov
165*b71edfaaSTobias Hietadef ValidateChecksums(reference_checksums, new_checksums, allow_missing_projects=False):
1668b62e088SIlya Biryukov    """Validates that reference_checksums and new_checksums match.
1678b62e088SIlya Biryukov
1688b62e088SIlya Biryukov    Args:
1698b62e088SIlya Biryukov      reference_checksums: a dict of reference checksums, mapping from a project
1708b62e088SIlya Biryukov        name to a project checksum.
1718b62e088SIlya Biryukov      new_checksums: a dict of checksums to be checked, mapping from a project
1728b62e088SIlya Biryukov        name to a project checksum.
1738b62e088SIlya Biryukov      allow_missing_projects:
1748b62e088SIlya Biryukov        When True, reference_checksums may contain more projects than
1758b62e088SIlya Biryukov          new_checksums. Projects missing from new_checksums are ignored.
1768b62e088SIlya Biryukov        When False, new_checksums and reference_checksums must contain checksums
1778b62e088SIlya Biryukov          for the same set of projects. If there is a project in
1788b62e088SIlya Biryukov          reference_checksums, missing from new_checksums, ValidateChecksums
1798b62e088SIlya Biryukov          will return False.
1808b62e088SIlya Biryukov
1818b62e088SIlya Biryukov    Returns:
1828b62e088SIlya Biryukov      True, if checksums match with regards to allow_missing_projects flag value.
1838b62e088SIlya Biryukov      False, otherwise.
1848b62e088SIlya Biryukov    """
1858b62e088SIlya Biryukov    if not allow_missing_projects:
1868b62e088SIlya Biryukov        if len(new_checksums) != len(reference_checksums):
1878b62e088SIlya Biryukov            return False
1888b62e088SIlya Biryukov
189873cba17SSerge Guelton    for proj, checksum in new_checksums.items():
1908b62e088SIlya Biryukov        # We never computed a checksum for this project.
1918b62e088SIlya Biryukov        if proj not in reference_checksums:
1928b62e088SIlya Biryukov            return False
1938b62e088SIlya Biryukov        # Checksum did not match.
1948b62e088SIlya Biryukov        if reference_checksums[proj] != checksum:
1958b62e088SIlya Biryukov            return False
1968b62e088SIlya Biryukov
1978b62e088SIlya Biryukov    return True
1988b62e088SIlya Biryukov
1998b62e088SIlya Biryukov
2008b62e088SIlya Biryukovif __name__ == "__main__":
2018b62e088SIlya Biryukov    main()
202