1*7330f729Sjoerg# -*- coding: utf-8 -*- 2*7330f729Sjoerg# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3*7330f729Sjoerg# See https://llvm.org/LICENSE.txt for license information. 4*7330f729Sjoerg# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5*7330f729Sjoerg""" This module is responsible for to parse a compiler invocation. """ 6*7330f729Sjoerg 7*7330f729Sjoergimport re 8*7330f729Sjoergimport os 9*7330f729Sjoergimport collections 10*7330f729Sjoerg 11*7330f729Sjoerg__all__ = ['split_command', 'classify_source', 'compiler_language'] 12*7330f729Sjoerg 13*7330f729Sjoerg# Ignored compiler options map for compilation database creation. 14*7330f729Sjoerg# The map is used in `split_command` method. (Which does ignore and classify 15*7330f729Sjoerg# parameters.) Please note, that these are not the only parameters which 16*7330f729Sjoerg# might be ignored. 17*7330f729Sjoerg# 18*7330f729Sjoerg# Keys are the option name, value number of options to skip 19*7330f729SjoergIGNORED_FLAGS = { 20*7330f729Sjoerg # compiling only flag, ignored because the creator of compilation 21*7330f729Sjoerg # database will explicitly set it. 22*7330f729Sjoerg '-c': 0, 23*7330f729Sjoerg # preprocessor macros, ignored because would cause duplicate entries in 24*7330f729Sjoerg # the output (the only difference would be these flags). this is actual 25*7330f729Sjoerg # finding from users, who suffered longer execution time caused by the 26*7330f729Sjoerg # duplicates. 27*7330f729Sjoerg '-MD': 0, 28*7330f729Sjoerg '-MMD': 0, 29*7330f729Sjoerg '-MG': 0, 30*7330f729Sjoerg '-MP': 0, 31*7330f729Sjoerg '-MF': 1, 32*7330f729Sjoerg '-MT': 1, 33*7330f729Sjoerg '-MQ': 1, 34*7330f729Sjoerg # linker options, ignored because for compilation database will contain 35*7330f729Sjoerg # compilation commands only. so, the compiler would ignore these flags 36*7330f729Sjoerg # anyway. the benefit to get rid of them is to make the output more 37*7330f729Sjoerg # readable. 38*7330f729Sjoerg '-static': 0, 39*7330f729Sjoerg '-shared': 0, 40*7330f729Sjoerg '-s': 0, 41*7330f729Sjoerg '-rdynamic': 0, 42*7330f729Sjoerg '-l': 1, 43*7330f729Sjoerg '-L': 1, 44*7330f729Sjoerg '-u': 1, 45*7330f729Sjoerg '-z': 1, 46*7330f729Sjoerg '-T': 1, 47*7330f729Sjoerg '-Xlinker': 1 48*7330f729Sjoerg} 49*7330f729Sjoerg 50*7330f729Sjoerg# Known C/C++ compiler executable name patterns 51*7330f729SjoergCOMPILER_PATTERNS = frozenset([ 52*7330f729Sjoerg re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), 53*7330f729Sjoerg re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), 54*7330f729Sjoerg re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), 55*7330f729Sjoerg re.compile(r'^llvm-g(cc|\+\+)$'), 56*7330f729Sjoerg]) 57*7330f729Sjoerg 58*7330f729Sjoerg 59*7330f729Sjoergdef split_command(command): 60*7330f729Sjoerg """ Returns a value when the command is a compilation, None otherwise. 61*7330f729Sjoerg 62*7330f729Sjoerg The value on success is a named tuple with the following attributes: 63*7330f729Sjoerg 64*7330f729Sjoerg files: list of source files 65*7330f729Sjoerg flags: list of compile options 66*7330f729Sjoerg compiler: string value of 'c' or 'c++' """ 67*7330f729Sjoerg 68*7330f729Sjoerg # the result of this method 69*7330f729Sjoerg result = collections.namedtuple('Compilation', 70*7330f729Sjoerg ['compiler', 'flags', 'files']) 71*7330f729Sjoerg result.compiler = compiler_language(command) 72*7330f729Sjoerg result.flags = [] 73*7330f729Sjoerg result.files = [] 74*7330f729Sjoerg # quit right now, if the program was not a C/C++ compiler 75*7330f729Sjoerg if not result.compiler: 76*7330f729Sjoerg return None 77*7330f729Sjoerg # iterate on the compile options 78*7330f729Sjoerg args = iter(command[1:]) 79*7330f729Sjoerg for arg in args: 80*7330f729Sjoerg # quit when compilation pass is not involved 81*7330f729Sjoerg if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: 82*7330f729Sjoerg return None 83*7330f729Sjoerg # ignore some flags 84*7330f729Sjoerg elif arg in IGNORED_FLAGS: 85*7330f729Sjoerg count = IGNORED_FLAGS[arg] 86*7330f729Sjoerg for _ in range(count): 87*7330f729Sjoerg next(args) 88*7330f729Sjoerg elif re.match(r'^-(l|L|Wl,).+', arg): 89*7330f729Sjoerg pass 90*7330f729Sjoerg # some parameters could look like filename, take as compile option 91*7330f729Sjoerg elif arg in {'-D', '-I'}: 92*7330f729Sjoerg result.flags.extend([arg, next(args)]) 93*7330f729Sjoerg # parameter which looks source file is taken... 94*7330f729Sjoerg elif re.match(r'^[^-].+', arg) and classify_source(arg): 95*7330f729Sjoerg result.files.append(arg) 96*7330f729Sjoerg # and consider everything else as compile option. 97*7330f729Sjoerg else: 98*7330f729Sjoerg result.flags.append(arg) 99*7330f729Sjoerg # do extra check on number of source files 100*7330f729Sjoerg return result if result.files else None 101*7330f729Sjoerg 102*7330f729Sjoerg 103*7330f729Sjoergdef classify_source(filename, c_compiler=True): 104*7330f729Sjoerg """ Return the language from file name extension. """ 105*7330f729Sjoerg 106*7330f729Sjoerg mapping = { 107*7330f729Sjoerg '.c': 'c' if c_compiler else 'c++', 108*7330f729Sjoerg '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', 109*7330f729Sjoerg '.ii': 'c++-cpp-output', 110*7330f729Sjoerg '.m': 'objective-c', 111*7330f729Sjoerg '.mi': 'objective-c-cpp-output', 112*7330f729Sjoerg '.mm': 'objective-c++', 113*7330f729Sjoerg '.mii': 'objective-c++-cpp-output', 114*7330f729Sjoerg '.C': 'c++', 115*7330f729Sjoerg '.cc': 'c++', 116*7330f729Sjoerg '.CC': 'c++', 117*7330f729Sjoerg '.cp': 'c++', 118*7330f729Sjoerg '.cpp': 'c++', 119*7330f729Sjoerg '.cxx': 'c++', 120*7330f729Sjoerg '.c++': 'c++', 121*7330f729Sjoerg '.C++': 'c++', 122*7330f729Sjoerg '.txx': 'c++' 123*7330f729Sjoerg } 124*7330f729Sjoerg 125*7330f729Sjoerg __, extension = os.path.splitext(os.path.basename(filename)) 126*7330f729Sjoerg return mapping.get(extension) 127*7330f729Sjoerg 128*7330f729Sjoerg 129*7330f729Sjoergdef compiler_language(command): 130*7330f729Sjoerg """ A predicate to decide the command is a compiler call or not. 131*7330f729Sjoerg 132*7330f729Sjoerg Returns 'c' or 'c++' when it match. None otherwise. """ 133*7330f729Sjoerg 134*7330f729Sjoerg cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') 135*7330f729Sjoerg 136*7330f729Sjoerg if command: 137*7330f729Sjoerg executable = os.path.basename(command[0]) 138*7330f729Sjoerg if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): 139*7330f729Sjoerg return 'c++' if cplusplus.match(executable) else 'c' 140*7330f729Sjoerg return None 141