1d9cf8291SDaniel Hwang# -*- coding: utf-8 -*- 2d9cf8291SDaniel Hwang# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3d9cf8291SDaniel Hwang# See https://llvm.org/LICENSE.txt for license information. 4d9cf8291SDaniel Hwang# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5d9cf8291SDaniel Hwang""" This module is responsible to capture the compiler invocation of any 6d9cf8291SDaniel Hwangbuild process. The result of that should be a compilation database. 7d9cf8291SDaniel Hwang 8d9cf8291SDaniel HwangThis implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES 9d9cf8291SDaniel Hwangmechanisms provided by the dynamic linker. The related library is implemented 10d9cf8291SDaniel Hwangin C language and can be found under 'libear' directory. 11d9cf8291SDaniel Hwang 12d9cf8291SDaniel HwangThe 'libear' library is capturing all child process creation and logging the 13d9cf8291SDaniel Hwangrelevant information about it into separate files in a specified directory. 14d9cf8291SDaniel HwangThe parameter of this process is the output directory name, where the report 15d9cf8291SDaniel Hwangfiles shall be placed. This parameter is passed as an environment variable. 16d9cf8291SDaniel Hwang 17d9cf8291SDaniel HwangThe module also implements compiler wrappers to intercept the compiler calls. 18d9cf8291SDaniel Hwang 19d9cf8291SDaniel HwangThe module implements the build command execution and the post-processing of 20d9cf8291SDaniel Hwangthe output files, which will condensates into a compilation database. """ 21d9cf8291SDaniel Hwang 22d9cf8291SDaniel Hwangimport sys 23d9cf8291SDaniel Hwangimport os 24d9cf8291SDaniel Hwangimport os.path 25d9cf8291SDaniel Hwangimport re 26d9cf8291SDaniel Hwangimport itertools 27d9cf8291SDaniel Hwangimport json 28d9cf8291SDaniel Hwangimport glob 29d9cf8291SDaniel Hwangimport logging 30d9cf8291SDaniel Hwangfrom libear import build_libear, TemporaryDirectory 31*dd3c26a0STobias Hietafrom libscanbuild import ( 32*dd3c26a0STobias Hieta command_entry_point, 33*dd3c26a0STobias Hieta compiler_wrapper, 34*dd3c26a0STobias Hieta wrapper_environment, 35*dd3c26a0STobias Hieta run_command, 36*dd3c26a0STobias Hieta run_build, 37*dd3c26a0STobias Hieta) 38d9cf8291SDaniel Hwangfrom libscanbuild import duplicate_check 39d9cf8291SDaniel Hwangfrom libscanbuild.compilation import split_command 40d9cf8291SDaniel Hwangfrom libscanbuild.arguments import parse_args_for_intercept_build 41d9cf8291SDaniel Hwangfrom libscanbuild.shell import encode, decode 42d9cf8291SDaniel Hwang 43*dd3c26a0STobias Hieta__all__ = ["capture", "intercept_build", "intercept_compiler_wrapper"] 44d9cf8291SDaniel Hwang 45*dd3c26a0STobias HietaGS = chr(0x1D) 46*dd3c26a0STobias HietaRS = chr(0x1E) 47*dd3c26a0STobias HietaUS = chr(0x1F) 48d9cf8291SDaniel Hwang 49*dd3c26a0STobias HietaCOMPILER_WRAPPER_CC = "intercept-cc" 50*dd3c26a0STobias HietaCOMPILER_WRAPPER_CXX = "intercept-c++" 51*dd3c26a0STobias HietaTRACE_FILE_EXTENSION = ".cmd" # same as in ear.c 52*dd3c26a0STobias HietaWRAPPER_ONLY_PLATFORMS = frozenset({"win32", "cygwin"}) 53d9cf8291SDaniel Hwang 54d9cf8291SDaniel Hwang 55d9cf8291SDaniel Hwang@command_entry_point 56d9cf8291SDaniel Hwangdef intercept_build(): 57d9cf8291SDaniel Hwang """Entry point for 'intercept-build' command.""" 58d9cf8291SDaniel Hwang 59d9cf8291SDaniel Hwang args = parse_args_for_intercept_build() 60d9cf8291SDaniel Hwang return capture(args) 61d9cf8291SDaniel Hwang 62d9cf8291SDaniel Hwang 63d9cf8291SDaniel Hwangdef capture(args): 64d9cf8291SDaniel Hwang """The entry point of build command interception.""" 65d9cf8291SDaniel Hwang 66d9cf8291SDaniel Hwang def post_processing(commands): 67d9cf8291SDaniel Hwang """To make a compilation database, it needs to filter out commands 68d9cf8291SDaniel Hwang which are not compiler calls. Needs to find the source file name 69d9cf8291SDaniel Hwang from the arguments. And do shell escaping on the command. 70d9cf8291SDaniel Hwang 71d9cf8291SDaniel Hwang To support incremental builds, it is desired to read elements from 72d9cf8291SDaniel Hwang an existing compilation database from a previous run. These elements 73d9cf8291SDaniel Hwang shall be merged with the new elements.""" 74d9cf8291SDaniel Hwang 75d9cf8291SDaniel Hwang # create entries from the current run 76d9cf8291SDaniel Hwang current = itertools.chain.from_iterable( 77d9cf8291SDaniel Hwang # creates a sequence of entry generators from an exec, 78*dd3c26a0STobias Hieta format_entry(command) 79*dd3c26a0STobias Hieta for command in commands 80*dd3c26a0STobias Hieta ) 81d9cf8291SDaniel Hwang # read entries from previous run 82*dd3c26a0STobias Hieta if "append" in args and args.append and os.path.isfile(args.cdb): 83d9cf8291SDaniel Hwang with open(args.cdb) as handle: 84d9cf8291SDaniel Hwang previous = iter(json.load(handle)) 85d9cf8291SDaniel Hwang else: 86d9cf8291SDaniel Hwang previous = iter([]) 87d9cf8291SDaniel Hwang # filter out duplicate entries from both 88d9cf8291SDaniel Hwang duplicate = duplicate_check(entry_hash) 89*dd3c26a0STobias Hieta return ( 90*dd3c26a0STobias Hieta entry 91d9cf8291SDaniel Hwang for entry in itertools.chain(previous, current) 92*dd3c26a0STobias Hieta if os.path.exists(entry["file"]) and not duplicate(entry) 93*dd3c26a0STobias Hieta ) 94d9cf8291SDaniel Hwang 95*dd3c26a0STobias Hieta with TemporaryDirectory(prefix="intercept-") as tmp_dir: 96d9cf8291SDaniel Hwang # run the build command 97d9cf8291SDaniel Hwang environment = setup_environment(args, tmp_dir) 98d9cf8291SDaniel Hwang exit_code = run_build(args.build, env=environment) 99d9cf8291SDaniel Hwang # read the intercepted exec calls 100d9cf8291SDaniel Hwang exec_traces = itertools.chain.from_iterable( 101d9cf8291SDaniel Hwang parse_exec_trace(os.path.join(tmp_dir, filename)) 102*dd3c26a0STobias Hieta for filename in sorted(glob.iglob(os.path.join(tmp_dir, "*.cmd"))) 103*dd3c26a0STobias Hieta ) 104d9cf8291SDaniel Hwang # do post processing 105d9cf8291SDaniel Hwang entries = post_processing(exec_traces) 106d9cf8291SDaniel Hwang # dump the compilation database 107*dd3c26a0STobias Hieta with open(args.cdb, "w+") as handle: 108d9cf8291SDaniel Hwang json.dump(list(entries), handle, sort_keys=True, indent=4) 109d9cf8291SDaniel Hwang return exit_code 110d9cf8291SDaniel Hwang 111d9cf8291SDaniel Hwang 112d9cf8291SDaniel Hwangdef setup_environment(args, destination): 113d9cf8291SDaniel Hwang """Sets up the environment for the build command. 114d9cf8291SDaniel Hwang 115d9cf8291SDaniel Hwang It sets the required environment variables and execute the given command. 116d9cf8291SDaniel Hwang The exec calls will be logged by the 'libear' preloaded library or by the 117d9cf8291SDaniel Hwang 'wrapper' programs.""" 118d9cf8291SDaniel Hwang 119*dd3c26a0STobias Hieta c_compiler = args.cc if "cc" in args else "cc" 120*dd3c26a0STobias Hieta cxx_compiler = args.cxx if "cxx" in args else "c++" 121d9cf8291SDaniel Hwang 122*dd3c26a0STobias Hieta libear_path = ( 123*dd3c26a0STobias Hieta None 124*dd3c26a0STobias Hieta if args.override_compiler or is_preload_disabled(sys.platform) 125*dd3c26a0STobias Hieta else build_libear(c_compiler, destination) 126*dd3c26a0STobias Hieta ) 127d9cf8291SDaniel Hwang 128d9cf8291SDaniel Hwang environment = dict(os.environ) 129*dd3c26a0STobias Hieta environment.update({"INTERCEPT_BUILD_TARGET_DIR": destination}) 130d9cf8291SDaniel Hwang 131d9cf8291SDaniel Hwang if not libear_path: 132*dd3c26a0STobias Hieta logging.debug("intercept gonna use compiler wrappers") 133d9cf8291SDaniel Hwang environment.update(wrapper_environment(args)) 134*dd3c26a0STobias Hieta environment.update({"CC": COMPILER_WRAPPER_CC, "CXX": COMPILER_WRAPPER_CXX}) 135*dd3c26a0STobias Hieta elif sys.platform == "darwin": 136*dd3c26a0STobias Hieta logging.debug("intercept gonna preload libear on OSX") 137*dd3c26a0STobias Hieta environment.update( 138*dd3c26a0STobias Hieta {"DYLD_INSERT_LIBRARIES": libear_path, "DYLD_FORCE_FLAT_NAMESPACE": "1"} 139*dd3c26a0STobias Hieta ) 140d9cf8291SDaniel Hwang else: 141*dd3c26a0STobias Hieta logging.debug("intercept gonna preload libear on UNIX") 142*dd3c26a0STobias Hieta environment.update({"LD_PRELOAD": libear_path}) 143d9cf8291SDaniel Hwang 144d9cf8291SDaniel Hwang return environment 145d9cf8291SDaniel Hwang 146d9cf8291SDaniel Hwang 147d9cf8291SDaniel Hwang@command_entry_point 148d9cf8291SDaniel Hwangdef intercept_compiler_wrapper(): 149d9cf8291SDaniel Hwang """Entry point for `intercept-cc` and `intercept-c++`.""" 150d9cf8291SDaniel Hwang 151d9cf8291SDaniel Hwang return compiler_wrapper(intercept_compiler_wrapper_impl) 152d9cf8291SDaniel Hwang 153d9cf8291SDaniel Hwang 154d9cf8291SDaniel Hwangdef intercept_compiler_wrapper_impl(_, execution): 155d9cf8291SDaniel Hwang """Implement intercept compiler wrapper functionality. 156d9cf8291SDaniel Hwang 157d9cf8291SDaniel Hwang It does generate execution report into target directory. 158d9cf8291SDaniel Hwang The target directory name is from environment variables.""" 159d9cf8291SDaniel Hwang 160*dd3c26a0STobias Hieta message_prefix = "execution report might be incomplete: %s" 161d9cf8291SDaniel Hwang 162*dd3c26a0STobias Hieta target_dir = os.getenv("INTERCEPT_BUILD_TARGET_DIR") 163d9cf8291SDaniel Hwang if not target_dir: 164*dd3c26a0STobias Hieta logging.warning(message_prefix, "missing target directory") 165d9cf8291SDaniel Hwang return 166d9cf8291SDaniel Hwang # write current execution info to the pid file 167d9cf8291SDaniel Hwang try: 168d9cf8291SDaniel Hwang target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION 169d9cf8291SDaniel Hwang target_file = os.path.join(target_dir, target_file_name) 170*dd3c26a0STobias Hieta logging.debug("writing execution report to: %s", target_file) 171d9cf8291SDaniel Hwang write_exec_trace(target_file, execution) 172d9cf8291SDaniel Hwang except IOError: 173*dd3c26a0STobias Hieta logging.warning(message_prefix, "io problem") 174d9cf8291SDaniel Hwang 175d9cf8291SDaniel Hwang 176d9cf8291SDaniel Hwangdef write_exec_trace(filename, entry): 177d9cf8291SDaniel Hwang """Write execution report file. 178d9cf8291SDaniel Hwang 179d9cf8291SDaniel Hwang This method shall be sync with the execution report writer in interception 180d9cf8291SDaniel Hwang library. The entry in the file is a JSON objects. 181d9cf8291SDaniel Hwang 182d9cf8291SDaniel Hwang :param filename: path to the output execution trace file, 183d9cf8291SDaniel Hwang :param entry: the Execution object to append to that file.""" 184d9cf8291SDaniel Hwang 185*dd3c26a0STobias Hieta with open(filename, "ab") as handler: 186d9cf8291SDaniel Hwang pid = str(entry.pid) 187d9cf8291SDaniel Hwang command = US.join(entry.cmd) + US 188*dd3c26a0STobias Hieta content = RS.join([pid, pid, "wrapper", entry.cwd, command]) + GS 189*dd3c26a0STobias Hieta handler.write(content.encode("utf-8")) 190d9cf8291SDaniel Hwang 191d9cf8291SDaniel Hwang 192d9cf8291SDaniel Hwangdef parse_exec_trace(filename): 193d9cf8291SDaniel Hwang """Parse the file generated by the 'libear' preloaded library. 194d9cf8291SDaniel Hwang 195d9cf8291SDaniel Hwang Given filename points to a file which contains the basic report 196d9cf8291SDaniel Hwang generated by the interception library or wrapper command. A single 197d9cf8291SDaniel Hwang report file _might_ contain multiple process creation info.""" 198d9cf8291SDaniel Hwang 199*dd3c26a0STobias Hieta logging.debug("parse exec trace file: %s", filename) 200*dd3c26a0STobias Hieta with open(filename, "r") as handler: 201d9cf8291SDaniel Hwang content = handler.read() 202d9cf8291SDaniel Hwang for group in filter(bool, content.split(GS)): 203d9cf8291SDaniel Hwang records = group.split(RS) 204d9cf8291SDaniel Hwang yield { 205*dd3c26a0STobias Hieta "pid": records[0], 206*dd3c26a0STobias Hieta "ppid": records[1], 207*dd3c26a0STobias Hieta "function": records[2], 208*dd3c26a0STobias Hieta "directory": records[3], 209*dd3c26a0STobias Hieta "command": records[4].split(US)[:-1], 210d9cf8291SDaniel Hwang } 211d9cf8291SDaniel Hwang 212d9cf8291SDaniel Hwang 213d9cf8291SDaniel Hwangdef format_entry(exec_trace): 214d9cf8291SDaniel Hwang """Generate the desired fields for compilation database entries.""" 215d9cf8291SDaniel Hwang 216d9cf8291SDaniel Hwang def abspath(cwd, name): 217d9cf8291SDaniel Hwang """Create normalized absolute path from input filename.""" 218d9cf8291SDaniel Hwang fullname = name if os.path.isabs(name) else os.path.join(cwd, name) 219d9cf8291SDaniel Hwang return os.path.normpath(fullname) 220d9cf8291SDaniel Hwang 221*dd3c26a0STobias Hieta logging.debug("format this command: %s", exec_trace["command"]) 222*dd3c26a0STobias Hieta compilation = split_command(exec_trace["command"]) 223d9cf8291SDaniel Hwang if compilation: 224d9cf8291SDaniel Hwang for source in compilation.files: 225*dd3c26a0STobias Hieta compiler = "c++" if compilation.compiler == "c++" else "cc" 226*dd3c26a0STobias Hieta command = [compiler, "-c"] + compilation.flags + [source] 227*dd3c26a0STobias Hieta logging.debug("formated as: %s", command) 228d9cf8291SDaniel Hwang yield { 229*dd3c26a0STobias Hieta "directory": exec_trace["directory"], 230*dd3c26a0STobias Hieta "command": encode(command), 231*dd3c26a0STobias Hieta "file": abspath(exec_trace["directory"], source), 232d9cf8291SDaniel Hwang } 233d9cf8291SDaniel Hwang 234d9cf8291SDaniel Hwang 235d9cf8291SDaniel Hwangdef is_preload_disabled(platform): 236d9cf8291SDaniel Hwang """Library-based interposition will fail silently if SIP is enabled, 237d9cf8291SDaniel Hwang so this should be detected. You can detect whether SIP is enabled on 238d9cf8291SDaniel Hwang Darwin by checking whether (1) there is a binary called 'csrutil' in 239d9cf8291SDaniel Hwang the path and, if so, (2) whether the output of executing 'csrutil status' 240d9cf8291SDaniel Hwang contains 'System Integrity Protection status: enabled'. 241d9cf8291SDaniel Hwang 242d9cf8291SDaniel Hwang :param platform: name of the platform (returned by sys.platform), 243d9cf8291SDaniel Hwang :return: True if library preload will fail by the dynamic linker.""" 244d9cf8291SDaniel Hwang 245d9cf8291SDaniel Hwang if platform in WRAPPER_ONLY_PLATFORMS: 246d9cf8291SDaniel Hwang return True 247*dd3c26a0STobias Hieta elif platform == "darwin": 248*dd3c26a0STobias Hieta command = ["csrutil", "status"] 249*dd3c26a0STobias Hieta pattern = re.compile(r"System Integrity Protection status:\s+enabled") 250d9cf8291SDaniel Hwang try: 251d9cf8291SDaniel Hwang return any(pattern.match(line) for line in run_command(command)) 252d9cf8291SDaniel Hwang except: 253d9cf8291SDaniel Hwang return False 254d9cf8291SDaniel Hwang else: 255d9cf8291SDaniel Hwang return False 256d9cf8291SDaniel Hwang 257d9cf8291SDaniel Hwang 258d9cf8291SDaniel Hwangdef entry_hash(entry): 259d9cf8291SDaniel Hwang """Implement unique hash method for compilation database entries.""" 260d9cf8291SDaniel Hwang 261d9cf8291SDaniel Hwang # For faster lookup in set filename is reverted 262*dd3c26a0STobias Hieta filename = entry["file"][::-1] 263d9cf8291SDaniel Hwang # For faster lookup in set directory is reverted 264*dd3c26a0STobias Hieta directory = entry["directory"][::-1] 265d9cf8291SDaniel Hwang # On OS X the 'cc' and 'c++' compilers are wrappers for 266d9cf8291SDaniel Hwang # 'clang' therefore both call would be logged. To avoid 267d9cf8291SDaniel Hwang # this the hash does not contain the first word of the 268d9cf8291SDaniel Hwang # command. 269*dd3c26a0STobias Hieta command = " ".join(decode(entry["command"])[1:]) 270d9cf8291SDaniel Hwang 271*dd3c26a0STobias Hieta return "<>".join([filename, directory, command]) 272