xref: /llvm-project/llvm/utils/gn/build/sync_source_lists_from_cmake.py (revision 82811a8f50f7a8a2afab3207f51670a7d9b57d4d)
1#!/usr/bin/env python3
2
3"""Helps to keep BUILD.gn files in sync with the corresponding CMakeLists.txt.
4
5For each BUILD.gn file in the tree, checks if the list of cpp files in
6it is identical to the list of cpp files in the corresponding CMakeLists.txt
7file, and prints the difference if not.
8
9Also checks that each CMakeLists.txt file below unittests/ folders that define
10binaries have corresponding BUILD.gn files.
11
12If --write is passed, tries to write modified .gn files and adds one git
13commit for each cmake commit this merges. If an error is reported, the state
14of HEAD is unspecified; run `git reset --hard origin/main` if this happens.
15"""
16
17from collections import defaultdict
18import os
19import re
20import subprocess
21import sys
22
23
24def patch_gn_file(gn_file, add, remove):
25    with open(gn_file) as f:
26        gn_contents = f.read()
27    if add:
28        srcs_tok = "sources = ["
29        tokloc = gn_contents.find(srcs_tok)
30        while gn_contents.startswith("sources = []", tokloc):
31            tokloc = gn_contents.find(srcs_tok, tokloc + 1)
32        if tokloc == -1:
33            raise ValueError(gn_file + ": No source list")
34        if gn_contents.find(srcs_tok, tokloc + 1) != -1:
35            raise ValueError(gn_file + ": Multiple source lists")
36        if gn_contents.find("# NOSORT", 0, tokloc) != -1:
37            raise ValueError(gn_file + ": Found # NOSORT, needs manual merge")
38        tokloc += len(srcs_tok)
39        for a in add:
40            gn_contents = gn_contents[:tokloc] + ('"%s",' % a) + gn_contents[tokloc:]
41    for r in remove:
42        gn_contents = gn_contents.replace('"%s",' % r, "")
43    with open(gn_file, "w") as f:
44        f.write(gn_contents)
45
46    # Run `gn format`.
47    gn = os.path.join(os.path.dirname(__file__), "..", "gn.py")
48    subprocess.check_call([sys.executable, gn, "format", "-q", gn_file])
49
50
51def sync_source_lists(write):
52    # Use shell=True on Windows in case git is a bat file.
53    def git(args):
54        subprocess.check_call(["git"] + args, shell=os.name == "nt")
55
56    def git_out(args):
57        return subprocess.check_output(
58            ["git"] + args, shell=os.name == "nt", universal_newlines=True
59        )
60
61    gn_files = git_out(["ls-files", "*BUILD.gn"]).splitlines()
62
63    # Matches e.g. |   "foo.cpp",|, captures |foo| in group 1.
64    gn_cpp_re = re.compile(r'^\s*"([^$"]+\.(?:cpp|c|h|S))",$', re.MULTILINE)
65    # Matches e.g. |   bar_sources = [ "foo.cpp" ]|, captures |foo| in group 1.
66    gn_cpp_re2 = re.compile(
67        r'^\s*(?:.*_)?sources \+?= \[ "([^$"]+\.(?:cpp|c|h|S))" ]$', re.MULTILINE
68    )
69    # Matches e.g. |   foo.cpp|, captures |foo| in group 1.
70    cmake_cpp_re = re.compile(r"^\s*([A-Za-z_0-9./-]+\.(?:cpp|c|h|S))$", re.MULTILINE)
71
72    changes_by_rev = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
73
74    def find_gitrev(touched_line, in_file):
75        # re.escape() escapes e.g. '-', which works in practice but has
76        # undefined behavior according to the POSIX extended regex spec.
77        posix_re_escape = lambda s: re.sub(r"([.[{()\\*+?|^$])", r"\\\1", s)
78        cmd = [
79            "log",
80            "--format=%h",
81            "-1",
82            "--pickaxe-regex",
83            # `\<` / `\>` cause issues on Windows (and is a GNU extension).
84            # `\b` is a GNU extension and stopped working in Apple Git-143
85            # (Xcode 13.3).
86            # `[:space:]` is over 10x faster than `^[:alnum:]` and hopefully
87            # good enough.
88            r"-S[[:space:]]%s[[:space:]]" % posix_re_escape(touched_line),
89            in_file,
90        ]
91        return git_out(cmd).rstrip()
92
93    # Collect changes to gn files, grouped by revision.
94    for gn_file in gn_files:
95        # The CMakeLists.txt for llvm/utils/gn/secondary/foo/BUILD.gn is
96        # at foo/CMakeLists.txt.
97        strip_prefix = "llvm/utils/gn/secondary/"
98        if not gn_file.startswith(strip_prefix):
99            continue
100        cmake_file = os.path.join(
101            os.path.dirname(gn_file[len(strip_prefix) :]), "CMakeLists.txt"
102        )
103        if not os.path.exists(cmake_file):
104            continue
105
106        def get_sources(source_re, text):
107            return set([m.group(1) for m in source_re.finditer(text)])
108
109        gn_cpp = get_sources(gn_cpp_re, open(gn_file).read())
110        gn_cpp |= get_sources(gn_cpp_re2, open(gn_file).read())
111        cmake_cpp = get_sources(cmake_cpp_re, open(cmake_file).read())
112
113        if gn_cpp == cmake_cpp:
114            continue
115
116        def by_rev(files, key):
117            for f in files:
118                rev = find_gitrev(f, cmake_file)
119                changes_by_rev[rev][gn_file][key].append(f)
120
121        by_rev(sorted(cmake_cpp - gn_cpp), "add")
122        by_rev(sorted(gn_cpp - cmake_cpp), "remove")
123
124    # Output necessary changes grouped by revision.
125    for rev in sorted(changes_by_rev):
126        commit_url = 'https://github.com/llvm/llvm-project/commit'
127        print("[gn build] Port {0} -- {1}/{0}".format(rev, commit_url))
128        for gn_file, data in sorted(changes_by_rev[rev].items()):
129            add = data.get("add", [])
130            remove = data.get("remove", [])
131            if write:
132                patch_gn_file(gn_file, add, remove)
133                git(["add", gn_file])
134            else:
135                print("  " + gn_file)
136                if add:
137                    print("   add:\n" + "\n".join('    "%s",' % a for a in add))
138                if remove:
139                    print("   remove:\n    " + "\n    ".join(remove))
140                print()
141        if write:
142            git(["commit", "-m", "[gn build] Port %s" % rev])
143        else:
144            print()
145
146    return bool(changes_by_rev) and not write
147
148
149def sync_unittests():
150    # Matches e.g. |add_llvm_unittest_with_input_files|.
151    unittest_re = re.compile(r"^add_\S+_unittest", re.MULTILINE)
152
153    checked = ["bolt", "clang", "clang-tools-extra", "lld", "llvm"]
154    changed = False
155    for c in checked:
156        for root, _, _ in os.walk(os.path.join(c, "unittests")):
157            cmake_file = os.path.join(root, "CMakeLists.txt")
158            if not os.path.exists(cmake_file):
159                continue
160            if not unittest_re.search(open(cmake_file).read()):
161                continue  # Skip CMake files that just add subdirectories.
162            gn_file = os.path.join("llvm/utils/gn/secondary", root, "BUILD.gn")
163            if not os.path.exists(gn_file):
164                changed = True
165                print(
166                    "missing GN file %s for unittest CMake file %s"
167                    % (gn_file, cmake_file)
168                )
169    return changed
170
171
172def main():
173    src = sync_source_lists(len(sys.argv) > 1 and sys.argv[1] == "--write")
174    tests = sync_unittests()
175    if src or tests:
176        sys.exit(1)
177
178
179if __name__ == "__main__":
180    main()
181