1#!/usr/bin/env python3 2 3"""Helps to keep BUILD.gn files in sync with the corresponding CMakeLists.txt. 4 5For each BUILD.gn file in the tree, checks if the list of cpp files in 6it is identical to the list of cpp files in the corresponding CMakeLists.txt 7file, and prints the difference if not. 8 9Also checks that each CMakeLists.txt file below unittests/ folders that define 10binaries have corresponding BUILD.gn files. 11 12If --write is passed, tries to write modified .gn files and adds one git 13commit for each cmake commit this merges. If an error is reported, the state 14of HEAD is unspecified; run `git reset --hard origin/main` if this happens. 15""" 16 17from collections import defaultdict 18import os 19import re 20import subprocess 21import sys 22 23 24def patch_gn_file(gn_file, add, remove): 25 with open(gn_file) as f: 26 gn_contents = f.read() 27 if add: 28 srcs_tok = 'sources = [' 29 tokloc = gn_contents.find(srcs_tok) 30 while gn_contents.startswith('sources = []', tokloc): 31 tokloc = gn_contents.find(srcs_tok, tokloc + 1) 32 if tokloc == -1: raise ValueError(gn_file + ': No source list') 33 if gn_contents.find(srcs_tok, tokloc + 1) != -1: 34 raise ValueError(gn_file + ': Multiple source lists') 35 if gn_contents.find('# NOSORT', 0, tokloc) != -1: 36 raise ValueError(gn_file + ': Found # NOSORT, needs manual merge') 37 tokloc += len(srcs_tok) 38 for a in add: 39 gn_contents = (gn_contents[:tokloc] + ('"%s",' % a) + 40 gn_contents[tokloc:]) 41 for r in remove: 42 gn_contents = gn_contents.replace('"%s",' % r, '') 43 with open(gn_file, 'w') as f: 44 f.write(gn_contents) 45 46 # Run `gn format`. 47 gn = os.path.join(os.path.dirname(__file__), '..', 'gn.py') 48 subprocess.check_call([sys.executable, gn, 'format', '-q', gn_file]) 49 50 51def sync_source_lists(write): 52 # Use shell=True on Windows in case git is a bat file. 53 def git(args): subprocess.check_call(['git'] + args, shell=os.name == 'nt') 54 def git_out(args): 55 return subprocess.check_output(['git'] + args, shell=os.name == 'nt', 56 universal_newlines=True) 57 gn_files = git_out(['ls-files', '*BUILD.gn']).splitlines() 58 59 # Matches e.g. | "foo.cpp",|, captures |foo| in group 1. 60 gn_cpp_re = re.compile(r'^\s*"([^$"]+\.(?:cpp|c|h|S))",$', re.MULTILINE) 61 # Matches e.g. | bar_sources = [ "foo.cpp" ]|, captures |foo| in group 1. 62 gn_cpp_re2 = re.compile( 63 r'^\s*(?:.*_)?sources \+?= \[ "([^$"]+\.(?:cpp|c|h|S))" ]$', 64 re.MULTILINE) 65 # Matches e.g. | foo.cpp|, captures |foo| in group 1. 66 cmake_cpp_re = re.compile(r'^\s*([A-Za-z_0-9./-]+\.(?:cpp|c|h|S))$', 67 re.MULTILINE) 68 69 changes_by_rev = defaultdict(lambda: defaultdict(lambda: defaultdict(list))) 70 71 def find_gitrev(touched_line, in_file): 72 # re.escape() escapes e.g. '-', which works in practice but has 73 # undefined behavior according to the POSIX extended regex spec. 74 posix_re_escape = lambda s: re.sub(r'([.[{()\\*+?|^$])', r'\\\1', s) 75 cmd = ['log', '--format=%h', '-1', '--pickaxe-regex', 76 r'-S\b%s\b' % posix_re_escape(touched_line), in_file] 77 return git_out(cmd).rstrip() 78 79 # Collect changes to gn files, grouped by revision. 80 for gn_file in gn_files: 81 # The CMakeLists.txt for llvm/utils/gn/secondary/foo/BUILD.gn is 82 # at foo/CMakeLists.txt. 83 strip_prefix = 'llvm/utils/gn/secondary/' 84 if not gn_file.startswith(strip_prefix): 85 continue 86 cmake_file = os.path.join( 87 os.path.dirname(gn_file[len(strip_prefix):]), 'CMakeLists.txt') 88 if not os.path.exists(cmake_file): 89 continue 90 91 def get_sources(source_re, text): 92 return set([m.group(1) for m in source_re.finditer(text)]) 93 gn_cpp = get_sources(gn_cpp_re, open(gn_file).read()) 94 gn_cpp |= get_sources(gn_cpp_re2, open(gn_file).read()) 95 cmake_cpp = get_sources(cmake_cpp_re, open(cmake_file).read()) 96 97 if gn_cpp == cmake_cpp: 98 continue 99 100 def by_rev(files, key): 101 for f in files: 102 rev = find_gitrev(f, cmake_file) 103 changes_by_rev[rev][gn_file][key].append(f) 104 by_rev(sorted(cmake_cpp - gn_cpp), 'add') 105 by_rev(sorted(gn_cpp - cmake_cpp), 'remove') 106 107 # Output necessary changes grouped by revision. 108 for rev in sorted(changes_by_rev): 109 print('[gn build] Port {0} -- https://reviews.llvm.org/rG{0}' 110 .format(rev)) 111 for gn_file, data in sorted(changes_by_rev[rev].items()): 112 add = data.get('add', []) 113 remove = data.get('remove', []) 114 if write: 115 patch_gn_file(gn_file, add, remove) 116 git(['add', gn_file]) 117 else: 118 print(' ' + gn_file) 119 if add: 120 print(' add:\n' + '\n'.join(' "%s",' % a for a in add)) 121 if remove: 122 print(' remove:\n ' + '\n '.join(remove)) 123 print() 124 if write: 125 git(['commit', '-m', '[gn build] Port %s' % rev]) 126 else: 127 print() 128 129 return bool(changes_by_rev) and not write 130 131 132def sync_unittests(): 133 # Matches e.g. |add_llvm_unittest_with_input_files|. 134 unittest_re = re.compile(r'^add_\S+_unittest', re.MULTILINE) 135 136 checked = [ 'bolt', 'clang', 'clang-tools-extra', 'lld', 'llvm' ] 137 changed = False 138 for c in checked: 139 for root, _, _ in os.walk(os.path.join(c, 'unittests')): 140 cmake_file = os.path.join(root, 'CMakeLists.txt') 141 if not os.path.exists(cmake_file): 142 continue 143 if not unittest_re.search(open(cmake_file).read()): 144 continue # Skip CMake files that just add subdirectories. 145 gn_file = os.path.join('llvm/utils/gn/secondary', root, 'BUILD.gn') 146 if not os.path.exists(gn_file): 147 changed = True 148 print('missing GN file %s for unittest CMake file %s' % 149 (gn_file, cmake_file)) 150 return changed 151 152 153def main(): 154 src = sync_source_lists(len(sys.argv) > 1 and sys.argv[1] == '--write') 155 tests = sync_unittests() 156 if src or tests: 157 sys.exit(1) 158 159 160if __name__ == '__main__': 161 main() 162