xref: /llvm-project/llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py (revision b71edfaa4ec3c998aadb35255ce2f60bba2940b0)
1#!/usr/bin/env python
2""" A small program to compute checksums of LLVM checkout.
3"""
4from __future__ import absolute_import
5from __future__ import division
6from __future__ import print_function
7
8import hashlib
9import logging
10import re
11import sys
12from argparse import ArgumentParser
13from project_tree import *
14
15SVN_DATES_REGEX = re.compile(r"\$(Date|LastChangedDate)[^\$]+\$")
16
17
18def main():
19    parser = ArgumentParser()
20    parser.add_argument(
21        "-v", "--verbose", action="store_true", help="enable debug logging"
22    )
23    parser.add_argument(
24        "-c",
25        "--check",
26        metavar="reference_file",
27        help="read checksums from reference_file and "
28        + "check they match checksums of llvm_path.",
29    )
30    parser.add_argument(
31        "--partial",
32        action="store_true",
33        help="ignore projects from reference_file "
34        + "that are not checked out in llvm_path.",
35    )
36    parser.add_argument(
37        "--multi_dir",
38        action="store_true",
39        help="indicates llvm_path contains llvm, checked out "
40        + "into multiple directories, as opposed to a "
41        + "typical single source tree checkout.",
42    )
43    parser.add_argument("llvm_path")
44
45    args = parser.parse_args()
46    if args.check is not None:
47        with open(args.check, "r") as f:
48            reference_checksums = ReadLLVMChecksums(f)
49    else:
50        reference_checksums = None
51
52    if args.verbose:
53        logging.basicConfig(level=logging.DEBUG)
54
55    llvm_projects = CreateLLVMProjects(not args.multi_dir)
56    checksums = ComputeLLVMChecksums(args.llvm_path, llvm_projects)
57
58    if reference_checksums is None:
59        WriteLLVMChecksums(checksums, sys.stdout)
60        sys.exit(0)
61
62    if not ValidateChecksums(reference_checksums, checksums, args.partial):
63        sys.stdout.write("Checksums differ.\nNew checksums:\n")
64        WriteLLVMChecksums(checksums, sys.stdout)
65        sys.stdout.write("Reference checksums:\n")
66        WriteLLVMChecksums(reference_checksums, sys.stdout)
67        sys.exit(1)
68    else:
69        sys.stdout.write("Checksums match.")
70
71
72def ComputeLLVMChecksums(root_path, projects):
73    """Compute checksums for LLVM sources checked out using svn.
74
75    Args:
76      root_path: a directory of llvm checkout.
77      projects: a list of LLVMProject instances, which describe checkout paths,
78        relative to root_path.
79
80    Returns:
81      A dict mapping from project name to project checksum.
82    """
83    hash_algo = hashlib.sha256
84
85    def collapse_svn_substitutions(contents):
86        # Replace svn substitutions for $Date$ and $LastChangedDate$.
87        # Unfortunately, these are locale-specific.
88        return SVN_DATES_REGEX.sub("$\1$", contents)
89
90    def read_and_collapse_svn_subsitutions(file_path):
91        with open(file_path, "rb") as f:
92            contents = f.read()
93            new_contents = collapse_svn_substitutions(contents)
94            if contents != new_contents:
95                logging.debug("Replaced svn keyword substitutions in %s", file_path)
96                logging.debug("\n\tBefore\n%s\n\tAfter\n%s", contents, new_contents)
97            return new_contents
98
99    project_checksums = dict()
100    # Hash each project.
101    for proj in projects:
102        project_root = os.path.join(root_path, proj.relpath)
103        if not os.path.exists(project_root):
104            logging.info(
105                "Folder %s doesn't exist, skipping project %s", proj.relpath, proj.name
106            )
107            continue
108
109        files = list()
110
111        def add_file_hash(file_path):
112            if os.path.islink(file_path) and not os.path.exists(file_path):
113                content = os.readlink(file_path)
114            else:
115                content = read_and_collapse_svn_subsitutions(file_path)
116            hasher = hash_algo()
117            hasher.update(content)
118            file_digest = hasher.hexdigest()
119            logging.debug("Checksum %s for file %s", file_digest, file_path)
120            files.append((file_path, file_digest))
121
122        logging.info("Computing checksum for %s", proj.name)
123        WalkProjectFiles(root_path, projects, proj, add_file_hash)
124
125        # Compute final checksum.
126        files.sort(key=lambda x: x[0])
127        hasher = hash_algo()
128        for file_path, file_digest in files:
129            file_path = os.path.relpath(file_path, project_root)
130            hasher.update(file_path)
131            hasher.update(file_digest)
132        project_checksums[proj.name] = hasher.hexdigest()
133    return project_checksums
134
135
136def WriteLLVMChecksums(checksums, f):
137    """Writes checksums to a text file.
138
139    Args:
140      checksums: a dict mapping from project name to project checksum (result of
141        ComputeLLVMChecksums).
142      f: a file object to write into.
143    """
144
145    for proj in sorted(checksums.keys()):
146        f.write("{} {}\n".format(checksums[proj], proj))
147
148
149def ReadLLVMChecksums(f):
150    """Reads checksums from a text file, produced by WriteLLVMChecksums.
151
152    Returns:
153      A dict, mapping from project name to project checksum.
154    """
155    checksums = {}
156    while True:
157        line = f.readline()
158        if line == "":
159            break
160        checksum, proj = line.split()
161        checksums[proj] = checksum
162    return checksums
163
164
165def ValidateChecksums(reference_checksums, new_checksums, allow_missing_projects=False):
166    """Validates that reference_checksums and new_checksums match.
167
168    Args:
169      reference_checksums: a dict of reference checksums, mapping from a project
170        name to a project checksum.
171      new_checksums: a dict of checksums to be checked, mapping from a project
172        name to a project checksum.
173      allow_missing_projects:
174        When True, reference_checksums may contain more projects than
175          new_checksums. Projects missing from new_checksums are ignored.
176        When False, new_checksums and reference_checksums must contain checksums
177          for the same set of projects. If there is a project in
178          reference_checksums, missing from new_checksums, ValidateChecksums
179          will return False.
180
181    Returns:
182      True, if checksums match with regards to allow_missing_projects flag value.
183      False, otherwise.
184    """
185    if not allow_missing_projects:
186        if len(new_checksums) != len(reference_checksums):
187            return False
188
189    for proj, checksum in new_checksums.items():
190        # We never computed a checksum for this project.
191        if proj not in reference_checksums:
192            return False
193        # Checksum did not match.
194        if reference_checksums[proj] != checksum:
195            return False
196
197    return True
198
199
200if __name__ == "__main__":
201    main()
202