1*a9ac8606Spatrick# -*- coding: utf-8 -*- 2*a9ac8606Spatrick# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3*a9ac8606Spatrick# See https://llvm.org/LICENSE.txt for license information. 4*a9ac8606Spatrick# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5*a9ac8606Spatrick""" This module is responsible to capture the compiler invocation of any 6*a9ac8606Spatrickbuild process. The result of that should be a compilation database. 7*a9ac8606Spatrick 8*a9ac8606SpatrickThis implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES 9*a9ac8606Spatrickmechanisms provided by the dynamic linker. The related library is implemented 10*a9ac8606Spatrickin C language and can be found under 'libear' directory. 11*a9ac8606Spatrick 12*a9ac8606SpatrickThe 'libear' library is capturing all child process creation and logging the 13*a9ac8606Spatrickrelevant information about it into separate files in a specified directory. 14*a9ac8606SpatrickThe parameter of this process is the output directory name, where the report 15*a9ac8606Spatrickfiles shall be placed. This parameter is passed as an environment variable. 16*a9ac8606Spatrick 17*a9ac8606SpatrickThe module also implements compiler wrappers to intercept the compiler calls. 18*a9ac8606Spatrick 19*a9ac8606SpatrickThe module implements the build command execution and the post-processing of 20*a9ac8606Spatrickthe output files, which will condensates into a compilation database. """ 21*a9ac8606Spatrick 22*a9ac8606Spatrickimport sys 23*a9ac8606Spatrickimport os 24*a9ac8606Spatrickimport os.path 25*a9ac8606Spatrickimport re 26*a9ac8606Spatrickimport itertools 27*a9ac8606Spatrickimport json 28*a9ac8606Spatrickimport glob 29*a9ac8606Spatrickimport logging 30*a9ac8606Spatrickfrom libear import build_libear, TemporaryDirectory 31*a9ac8606Spatrickfrom libscanbuild import command_entry_point, compiler_wrapper, \ 32*a9ac8606Spatrick wrapper_environment, run_command, run_build 33*a9ac8606Spatrickfrom libscanbuild import duplicate_check 34*a9ac8606Spatrickfrom libscanbuild.compilation import split_command 35*a9ac8606Spatrickfrom libscanbuild.arguments import parse_args_for_intercept_build 36*a9ac8606Spatrickfrom libscanbuild.shell import encode, decode 37*a9ac8606Spatrick 38*a9ac8606Spatrick__all__ = ['capture', 'intercept_build', 'intercept_compiler_wrapper'] 39*a9ac8606Spatrick 40*a9ac8606SpatrickGS = chr(0x1d) 41*a9ac8606SpatrickRS = chr(0x1e) 42*a9ac8606SpatrickUS = chr(0x1f) 43*a9ac8606Spatrick 44*a9ac8606SpatrickCOMPILER_WRAPPER_CC = 'intercept-cc' 45*a9ac8606SpatrickCOMPILER_WRAPPER_CXX = 'intercept-c++' 46*a9ac8606SpatrickTRACE_FILE_EXTENSION = '.cmd' # same as in ear.c 47*a9ac8606SpatrickWRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'}) 48*a9ac8606Spatrick 49*a9ac8606Spatrick 50*a9ac8606Spatrick@command_entry_point 51*a9ac8606Spatrickdef intercept_build(): 52*a9ac8606Spatrick """ Entry point for 'intercept-build' command. """ 53*a9ac8606Spatrick 54*a9ac8606Spatrick args = parse_args_for_intercept_build() 55*a9ac8606Spatrick return capture(args) 56*a9ac8606Spatrick 57*a9ac8606Spatrick 58*a9ac8606Spatrickdef capture(args): 59*a9ac8606Spatrick """ The entry point of build command interception. """ 60*a9ac8606Spatrick 61*a9ac8606Spatrick def post_processing(commands): 62*a9ac8606Spatrick """ To make a compilation database, it needs to filter out commands 63*a9ac8606Spatrick which are not compiler calls. Needs to find the source file name 64*a9ac8606Spatrick from the arguments. And do shell escaping on the command. 65*a9ac8606Spatrick 66*a9ac8606Spatrick To support incremental builds, it is desired to read elements from 67*a9ac8606Spatrick an existing compilation database from a previous run. These elements 68*a9ac8606Spatrick shall be merged with the new elements. """ 69*a9ac8606Spatrick 70*a9ac8606Spatrick # create entries from the current run 71*a9ac8606Spatrick current = itertools.chain.from_iterable( 72*a9ac8606Spatrick # creates a sequence of entry generators from an exec, 73*a9ac8606Spatrick format_entry(command) for command in commands) 74*a9ac8606Spatrick # read entries from previous run 75*a9ac8606Spatrick if 'append' in args and args.append and os.path.isfile(args.cdb): 76*a9ac8606Spatrick with open(args.cdb) as handle: 77*a9ac8606Spatrick previous = iter(json.load(handle)) 78*a9ac8606Spatrick else: 79*a9ac8606Spatrick previous = iter([]) 80*a9ac8606Spatrick # filter out duplicate entries from both 81*a9ac8606Spatrick duplicate = duplicate_check(entry_hash) 82*a9ac8606Spatrick return (entry 83*a9ac8606Spatrick for entry in itertools.chain(previous, current) 84*a9ac8606Spatrick if os.path.exists(entry['file']) and not duplicate(entry)) 85*a9ac8606Spatrick 86*a9ac8606Spatrick with TemporaryDirectory(prefix='intercept-') as tmp_dir: 87*a9ac8606Spatrick # run the build command 88*a9ac8606Spatrick environment = setup_environment(args, tmp_dir) 89*a9ac8606Spatrick exit_code = run_build(args.build, env=environment) 90*a9ac8606Spatrick # read the intercepted exec calls 91*a9ac8606Spatrick exec_traces = itertools.chain.from_iterable( 92*a9ac8606Spatrick parse_exec_trace(os.path.join(tmp_dir, filename)) 93*a9ac8606Spatrick for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd')))) 94*a9ac8606Spatrick # do post processing 95*a9ac8606Spatrick entries = post_processing(exec_traces) 96*a9ac8606Spatrick # dump the compilation database 97*a9ac8606Spatrick with open(args.cdb, 'w+') as handle: 98*a9ac8606Spatrick json.dump(list(entries), handle, sort_keys=True, indent=4) 99*a9ac8606Spatrick return exit_code 100*a9ac8606Spatrick 101*a9ac8606Spatrick 102*a9ac8606Spatrickdef setup_environment(args, destination): 103*a9ac8606Spatrick """ Sets up the environment for the build command. 104*a9ac8606Spatrick 105*a9ac8606Spatrick It sets the required environment variables and execute the given command. 106*a9ac8606Spatrick The exec calls will be logged by the 'libear' preloaded library or by the 107*a9ac8606Spatrick 'wrapper' programs. """ 108*a9ac8606Spatrick 109*a9ac8606Spatrick c_compiler = args.cc if 'cc' in args else 'cc' 110*a9ac8606Spatrick cxx_compiler = args.cxx if 'cxx' in args else 'c++' 111*a9ac8606Spatrick 112*a9ac8606Spatrick libear_path = None if args.override_compiler or is_preload_disabled( 113*a9ac8606Spatrick sys.platform) else build_libear(c_compiler, destination) 114*a9ac8606Spatrick 115*a9ac8606Spatrick environment = dict(os.environ) 116*a9ac8606Spatrick environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination}) 117*a9ac8606Spatrick 118*a9ac8606Spatrick if not libear_path: 119*a9ac8606Spatrick logging.debug('intercept gonna use compiler wrappers') 120*a9ac8606Spatrick environment.update(wrapper_environment(args)) 121*a9ac8606Spatrick environment.update({ 122*a9ac8606Spatrick 'CC': COMPILER_WRAPPER_CC, 123*a9ac8606Spatrick 'CXX': COMPILER_WRAPPER_CXX 124*a9ac8606Spatrick }) 125*a9ac8606Spatrick elif sys.platform == 'darwin': 126*a9ac8606Spatrick logging.debug('intercept gonna preload libear on OSX') 127*a9ac8606Spatrick environment.update({ 128*a9ac8606Spatrick 'DYLD_INSERT_LIBRARIES': libear_path, 129*a9ac8606Spatrick 'DYLD_FORCE_FLAT_NAMESPACE': '1' 130*a9ac8606Spatrick }) 131*a9ac8606Spatrick else: 132*a9ac8606Spatrick logging.debug('intercept gonna preload libear on UNIX') 133*a9ac8606Spatrick environment.update({'LD_PRELOAD': libear_path}) 134*a9ac8606Spatrick 135*a9ac8606Spatrick return environment 136*a9ac8606Spatrick 137*a9ac8606Spatrick 138*a9ac8606Spatrick@command_entry_point 139*a9ac8606Spatrickdef intercept_compiler_wrapper(): 140*a9ac8606Spatrick """ Entry point for `intercept-cc` and `intercept-c++`. """ 141*a9ac8606Spatrick 142*a9ac8606Spatrick return compiler_wrapper(intercept_compiler_wrapper_impl) 143*a9ac8606Spatrick 144*a9ac8606Spatrick 145*a9ac8606Spatrickdef intercept_compiler_wrapper_impl(_, execution): 146*a9ac8606Spatrick """ Implement intercept compiler wrapper functionality. 147*a9ac8606Spatrick 148*a9ac8606Spatrick It does generate execution report into target directory. 149*a9ac8606Spatrick The target directory name is from environment variables. """ 150*a9ac8606Spatrick 151*a9ac8606Spatrick message_prefix = 'execution report might be incomplete: %s' 152*a9ac8606Spatrick 153*a9ac8606Spatrick target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR') 154*a9ac8606Spatrick if not target_dir: 155*a9ac8606Spatrick logging.warning(message_prefix, 'missing target directory') 156*a9ac8606Spatrick return 157*a9ac8606Spatrick # write current execution info to the pid file 158*a9ac8606Spatrick try: 159*a9ac8606Spatrick target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION 160*a9ac8606Spatrick target_file = os.path.join(target_dir, target_file_name) 161*a9ac8606Spatrick logging.debug('writing execution report to: %s', target_file) 162*a9ac8606Spatrick write_exec_trace(target_file, execution) 163*a9ac8606Spatrick except IOError: 164*a9ac8606Spatrick logging.warning(message_prefix, 'io problem') 165*a9ac8606Spatrick 166*a9ac8606Spatrick 167*a9ac8606Spatrickdef write_exec_trace(filename, entry): 168*a9ac8606Spatrick """ Write execution report file. 169*a9ac8606Spatrick 170*a9ac8606Spatrick This method shall be sync with the execution report writer in interception 171*a9ac8606Spatrick library. The entry in the file is a JSON objects. 172*a9ac8606Spatrick 173*a9ac8606Spatrick :param filename: path to the output execution trace file, 174*a9ac8606Spatrick :param entry: the Execution object to append to that file. """ 175*a9ac8606Spatrick 176*a9ac8606Spatrick with open(filename, 'ab') as handler: 177*a9ac8606Spatrick pid = str(entry.pid) 178*a9ac8606Spatrick command = US.join(entry.cmd) + US 179*a9ac8606Spatrick content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS 180*a9ac8606Spatrick handler.write(content.encode('utf-8')) 181*a9ac8606Spatrick 182*a9ac8606Spatrick 183*a9ac8606Spatrickdef parse_exec_trace(filename): 184*a9ac8606Spatrick """ Parse the file generated by the 'libear' preloaded library. 185*a9ac8606Spatrick 186*a9ac8606Spatrick Given filename points to a file which contains the basic report 187*a9ac8606Spatrick generated by the interception library or wrapper command. A single 188*a9ac8606Spatrick report file _might_ contain multiple process creation info. """ 189*a9ac8606Spatrick 190*a9ac8606Spatrick logging.debug('parse exec trace file: %s', filename) 191*a9ac8606Spatrick with open(filename, 'r') as handler: 192*a9ac8606Spatrick content = handler.read() 193*a9ac8606Spatrick for group in filter(bool, content.split(GS)): 194*a9ac8606Spatrick records = group.split(RS) 195*a9ac8606Spatrick yield { 196*a9ac8606Spatrick 'pid': records[0], 197*a9ac8606Spatrick 'ppid': records[1], 198*a9ac8606Spatrick 'function': records[2], 199*a9ac8606Spatrick 'directory': records[3], 200*a9ac8606Spatrick 'command': records[4].split(US)[:-1] 201*a9ac8606Spatrick } 202*a9ac8606Spatrick 203*a9ac8606Spatrick 204*a9ac8606Spatrickdef format_entry(exec_trace): 205*a9ac8606Spatrick """ Generate the desired fields for compilation database entries. """ 206*a9ac8606Spatrick 207*a9ac8606Spatrick def abspath(cwd, name): 208*a9ac8606Spatrick """ Create normalized absolute path from input filename. """ 209*a9ac8606Spatrick fullname = name if os.path.isabs(name) else os.path.join(cwd, name) 210*a9ac8606Spatrick return os.path.normpath(fullname) 211*a9ac8606Spatrick 212*a9ac8606Spatrick logging.debug('format this command: %s', exec_trace['command']) 213*a9ac8606Spatrick compilation = split_command(exec_trace['command']) 214*a9ac8606Spatrick if compilation: 215*a9ac8606Spatrick for source in compilation.files: 216*a9ac8606Spatrick compiler = 'c++' if compilation.compiler == 'c++' else 'cc' 217*a9ac8606Spatrick command = [compiler, '-c'] + compilation.flags + [source] 218*a9ac8606Spatrick logging.debug('formated as: %s', command) 219*a9ac8606Spatrick yield { 220*a9ac8606Spatrick 'directory': exec_trace['directory'], 221*a9ac8606Spatrick 'command': encode(command), 222*a9ac8606Spatrick 'file': abspath(exec_trace['directory'], source) 223*a9ac8606Spatrick } 224*a9ac8606Spatrick 225*a9ac8606Spatrick 226*a9ac8606Spatrickdef is_preload_disabled(platform): 227*a9ac8606Spatrick """ Library-based interposition will fail silently if SIP is enabled, 228*a9ac8606Spatrick so this should be detected. You can detect whether SIP is enabled on 229*a9ac8606Spatrick Darwin by checking whether (1) there is a binary called 'csrutil' in 230*a9ac8606Spatrick the path and, if so, (2) whether the output of executing 'csrutil status' 231*a9ac8606Spatrick contains 'System Integrity Protection status: enabled'. 232*a9ac8606Spatrick 233*a9ac8606Spatrick :param platform: name of the platform (returned by sys.platform), 234*a9ac8606Spatrick :return: True if library preload will fail by the dynamic linker. """ 235*a9ac8606Spatrick 236*a9ac8606Spatrick if platform in WRAPPER_ONLY_PLATFORMS: 237*a9ac8606Spatrick return True 238*a9ac8606Spatrick elif platform == 'darwin': 239*a9ac8606Spatrick command = ['csrutil', 'status'] 240*a9ac8606Spatrick pattern = re.compile(r'System Integrity Protection status:\s+enabled') 241*a9ac8606Spatrick try: 242*a9ac8606Spatrick return any(pattern.match(line) for line in run_command(command)) 243*a9ac8606Spatrick except: 244*a9ac8606Spatrick return False 245*a9ac8606Spatrick else: 246*a9ac8606Spatrick return False 247*a9ac8606Spatrick 248*a9ac8606Spatrick 249*a9ac8606Spatrickdef entry_hash(entry): 250*a9ac8606Spatrick """ Implement unique hash method for compilation database entries. """ 251*a9ac8606Spatrick 252*a9ac8606Spatrick # For faster lookup in set filename is reverted 253*a9ac8606Spatrick filename = entry['file'][::-1] 254*a9ac8606Spatrick # For faster lookup in set directory is reverted 255*a9ac8606Spatrick directory = entry['directory'][::-1] 256*a9ac8606Spatrick # On OS X the 'cc' and 'c++' compilers are wrappers for 257*a9ac8606Spatrick # 'clang' therefore both call would be logged. To avoid 258*a9ac8606Spatrick # this the hash does not contain the first word of the 259*a9ac8606Spatrick # command. 260*a9ac8606Spatrick command = ' '.join(decode(entry['command'])[1:]) 261*a9ac8606Spatrick 262*a9ac8606Spatrick return '<>'.join([filename, directory, command]) 263