xref: /openbsd-src/gnu/llvm/llvm/utils/gn/build/sync_source_lists_from_cmake.py (revision d415bd752c734aee168c4ee86ff32e8cc249eb16)
1#!/usr/bin/env python3
2
3"""Helps to keep BUILD.gn files in sync with the corresponding CMakeLists.txt.
4
5For each BUILD.gn file in the tree, checks if the list of cpp files in
6it is identical to the list of cpp files in the corresponding CMakeLists.txt
7file, and prints the difference if not.
8
9Also checks that each CMakeLists.txt file below unittests/ folders that define
10binaries have corresponding BUILD.gn files.
11
12If --write is passed, tries to write modified .gn files and adds one git
13commit for each cmake commit this merges. If an error is reported, the state
14of HEAD is unspecified; run `git reset --hard origin/main` if this happens.
15"""
16
17from collections import defaultdict
18import os
19import re
20import subprocess
21import sys
22
23
24def patch_gn_file(gn_file, add, remove):
25    with open(gn_file) as f:
26        gn_contents = f.read()
27    if add:
28        srcs_tok = 'sources = ['
29        tokloc = gn_contents.find(srcs_tok)
30        while gn_contents.startswith('sources = []', tokloc):
31            tokloc = gn_contents.find(srcs_tok, tokloc + 1)
32        if tokloc == -1: raise ValueError(gn_file + ': No source list')
33        if gn_contents.find(srcs_tok, tokloc + 1) != -1:
34            raise ValueError(gn_file + ': Multiple source lists')
35        if gn_contents.find('# NOSORT', 0, tokloc) != -1:
36            raise ValueError(gn_file + ': Found # NOSORT, needs manual merge')
37        tokloc += len(srcs_tok)
38        for a in add:
39            gn_contents = (gn_contents[:tokloc] + ('"%s",' % a) +
40                           gn_contents[tokloc:])
41    for r in remove:
42        gn_contents = gn_contents.replace('"%s",' % r, '')
43    with open(gn_file, 'w') as f:
44        f.write(gn_contents)
45
46    # Run `gn format`.
47    gn = os.path.join(os.path.dirname(__file__), '..', 'gn.py')
48    subprocess.check_call([sys.executable, gn, 'format', '-q', gn_file])
49
50
51def sync_source_lists(write):
52    # Use shell=True on Windows in case git is a bat file.
53    def git(args): subprocess.check_call(['git'] + args, shell=os.name == 'nt')
54    def git_out(args):
55        return subprocess.check_output(['git'] + args, shell=os.name == 'nt',
56                                       universal_newlines=True)
57    gn_files = git_out(['ls-files', '*BUILD.gn']).splitlines()
58
59    # Matches e.g. |   "foo.cpp",|, captures |foo| in group 1.
60    gn_cpp_re = re.compile(r'^\s*"([^$"]+\.(?:cpp|c|h|S))",$', re.MULTILINE)
61    # Matches e.g. |   bar_sources = [ "foo.cpp" ]|, captures |foo| in group 1.
62    gn_cpp_re2 = re.compile(
63        r'^\s*(?:.*_)?sources \+?= \[ "([^$"]+\.(?:cpp|c|h|S))" ]$',
64        re.MULTILINE)
65    # Matches e.g. |   foo.cpp|, captures |foo| in group 1.
66    cmake_cpp_re = re.compile(r'^\s*([A-Za-z_0-9./-]+\.(?:cpp|c|h|S))$',
67                              re.MULTILINE)
68
69    changes_by_rev = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
70
71    def find_gitrev(touched_line, in_file):
72        # re.escape() escapes e.g. '-', which works in practice but has
73        # undefined behavior according to the POSIX extended regex spec.
74        posix_re_escape = lambda s: re.sub(r'([.[{()\\*+?|^$])', r'\\\1', s)
75        cmd = ['log', '--format=%h', '-1', '--pickaxe-regex',
76               r'-S\b%s\b' % posix_re_escape(touched_line), in_file]
77        return git_out(cmd).rstrip()
78
79    # Collect changes to gn files, grouped by revision.
80    for gn_file in gn_files:
81        # The CMakeLists.txt for llvm/utils/gn/secondary/foo/BUILD.gn is
82        # at foo/CMakeLists.txt.
83        strip_prefix = 'llvm/utils/gn/secondary/'
84        if not gn_file.startswith(strip_prefix):
85            continue
86        cmake_file = os.path.join(
87                os.path.dirname(gn_file[len(strip_prefix):]), 'CMakeLists.txt')
88        if not os.path.exists(cmake_file):
89            continue
90
91        def get_sources(source_re, text):
92            return set([m.group(1) for m in source_re.finditer(text)])
93        gn_cpp = get_sources(gn_cpp_re, open(gn_file).read())
94        gn_cpp |= get_sources(gn_cpp_re2, open(gn_file).read())
95        cmake_cpp = get_sources(cmake_cpp_re, open(cmake_file).read())
96
97        if gn_cpp == cmake_cpp:
98            continue
99
100        def by_rev(files, key):
101            for f in files:
102                rev = find_gitrev(f, cmake_file)
103                changes_by_rev[rev][gn_file][key].append(f)
104        by_rev(sorted(cmake_cpp - gn_cpp), 'add')
105        by_rev(sorted(gn_cpp - cmake_cpp), 'remove')
106
107    # Output necessary changes grouped by revision.
108    for rev in sorted(changes_by_rev):
109        print('[gn build] Port {0} -- https://reviews.llvm.org/rG{0}'
110            .format(rev))
111        for gn_file, data in sorted(changes_by_rev[rev].items()):
112            add = data.get('add', [])
113            remove = data.get('remove', [])
114            if write:
115                patch_gn_file(gn_file, add, remove)
116                git(['add', gn_file])
117            else:
118                print('  ' + gn_file)
119                if add:
120                    print('   add:\n' + '\n'.join('    "%s",' % a for a in add))
121                if remove:
122                    print('   remove:\n    ' + '\n    '.join(remove))
123                print()
124        if write:
125            git(['commit', '-m', '[gn build] Port %s' % rev])
126        else:
127            print()
128
129    return bool(changes_by_rev) and not write
130
131
132def sync_unittests():
133    # Matches e.g. |add_llvm_unittest_with_input_files|.
134    unittest_re = re.compile(r'^add_\S+_unittest', re.MULTILINE)
135
136    checked = [ 'bolt', 'clang', 'clang-tools-extra', 'lld', 'llvm' ]
137    changed = False
138    for c in checked:
139        for root, _, _ in os.walk(os.path.join(c, 'unittests')):
140            cmake_file = os.path.join(root, 'CMakeLists.txt')
141            if not os.path.exists(cmake_file):
142                continue
143            if not unittest_re.search(open(cmake_file).read()):
144                continue  # Skip CMake files that just add subdirectories.
145            gn_file = os.path.join('llvm/utils/gn/secondary', root, 'BUILD.gn')
146            if not os.path.exists(gn_file):
147                changed = True
148                print('missing GN file %s for unittest CMake file %s' %
149                      (gn_file, cmake_file))
150    return changed
151
152
153def main():
154    src = sync_source_lists(len(sys.argv) > 1 and sys.argv[1] == '--write')
155    tests = sync_unittests()
156    if src or tests:
157        sys.exit(1)
158
159
160if __name__ == '__main__':
161    main()
162