1*a9ac8606Spatrick# -*- coding: utf-8 -*- 2*a9ac8606Spatrick# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3*a9ac8606Spatrick# See https://llvm.org/LICENSE.txt for license information. 4*a9ac8606Spatrick# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5*a9ac8606Spatrick""" This module is responsible for to parse a compiler invocation. """ 6*a9ac8606Spatrick 7*a9ac8606Spatrickimport re 8*a9ac8606Spatrickimport os 9*a9ac8606Spatrickimport collections 10*a9ac8606Spatrick 11*a9ac8606Spatrick__all__ = ['split_command', 'classify_source', 'compiler_language'] 12*a9ac8606Spatrick 13*a9ac8606Spatrick# Ignored compiler options map for compilation database creation. 14*a9ac8606Spatrick# The map is used in `split_command` method. (Which does ignore and classify 15*a9ac8606Spatrick# parameters.) Please note, that these are not the only parameters which 16*a9ac8606Spatrick# might be ignored. 17*a9ac8606Spatrick# 18*a9ac8606Spatrick# Keys are the option name, value number of options to skip 19*a9ac8606SpatrickIGNORED_FLAGS = { 20*a9ac8606Spatrick # compiling only flag, ignored because the creator of compilation 21*a9ac8606Spatrick # database will explicitly set it. 22*a9ac8606Spatrick '-c': 0, 23*a9ac8606Spatrick # preprocessor macros, ignored because would cause duplicate entries in 24*a9ac8606Spatrick # the output (the only difference would be these flags). this is actual 25*a9ac8606Spatrick # finding from users, who suffered longer execution time caused by the 26*a9ac8606Spatrick # duplicates. 27*a9ac8606Spatrick '-MD': 0, 28*a9ac8606Spatrick '-MMD': 0, 29*a9ac8606Spatrick '-MG': 0, 30*a9ac8606Spatrick '-MP': 0, 31*a9ac8606Spatrick '-MF': 1, 32*a9ac8606Spatrick '-MT': 1, 33*a9ac8606Spatrick '-MQ': 1, 34*a9ac8606Spatrick # linker options, ignored because for compilation database will contain 35*a9ac8606Spatrick # compilation commands only. so, the compiler would ignore these flags 36*a9ac8606Spatrick # anyway. the benefit to get rid of them is to make the output more 37*a9ac8606Spatrick # readable. 38*a9ac8606Spatrick '-static': 0, 39*a9ac8606Spatrick '-shared': 0, 40*a9ac8606Spatrick '-s': 0, 41*a9ac8606Spatrick '-rdynamic': 0, 42*a9ac8606Spatrick '-l': 1, 43*a9ac8606Spatrick '-L': 1, 44*a9ac8606Spatrick '-u': 1, 45*a9ac8606Spatrick '-z': 1, 46*a9ac8606Spatrick '-T': 1, 47*a9ac8606Spatrick '-Xlinker': 1 48*a9ac8606Spatrick} 49*a9ac8606Spatrick 50*a9ac8606Spatrick# Known C/C++ compiler executable name patterns 51*a9ac8606SpatrickCOMPILER_PATTERNS = frozenset([ 52*a9ac8606Spatrick re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), 53*a9ac8606Spatrick re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), 54*a9ac8606Spatrick re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), 55*a9ac8606Spatrick re.compile(r'^llvm-g(cc|\+\+)$'), 56*a9ac8606Spatrick]) 57*a9ac8606Spatrick 58*a9ac8606Spatrick 59*a9ac8606Spatrickdef split_command(command): 60*a9ac8606Spatrick """ Returns a value when the command is a compilation, None otherwise. 61*a9ac8606Spatrick 62*a9ac8606Spatrick The value on success is a named tuple with the following attributes: 63*a9ac8606Spatrick 64*a9ac8606Spatrick files: list of source files 65*a9ac8606Spatrick flags: list of compile options 66*a9ac8606Spatrick compiler: string value of 'c' or 'c++' """ 67*a9ac8606Spatrick 68*a9ac8606Spatrick # the result of this method 69*a9ac8606Spatrick result = collections.namedtuple('Compilation', 70*a9ac8606Spatrick ['compiler', 'flags', 'files']) 71*a9ac8606Spatrick result.compiler = compiler_language(command) 72*a9ac8606Spatrick result.flags = [] 73*a9ac8606Spatrick result.files = [] 74*a9ac8606Spatrick # quit right now, if the program was not a C/C++ compiler 75*a9ac8606Spatrick if not result.compiler: 76*a9ac8606Spatrick return None 77*a9ac8606Spatrick # iterate on the compile options 78*a9ac8606Spatrick args = iter(command[1:]) 79*a9ac8606Spatrick for arg in args: 80*a9ac8606Spatrick # quit when compilation pass is not involved 81*a9ac8606Spatrick if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: 82*a9ac8606Spatrick return None 83*a9ac8606Spatrick # ignore some flags 84*a9ac8606Spatrick elif arg in IGNORED_FLAGS: 85*a9ac8606Spatrick count = IGNORED_FLAGS[arg] 86*a9ac8606Spatrick for _ in range(count): 87*a9ac8606Spatrick next(args) 88*a9ac8606Spatrick elif re.match(r'^-(l|L|Wl,).+', arg): 89*a9ac8606Spatrick pass 90*a9ac8606Spatrick # some parameters could look like filename, take as compile option 91*a9ac8606Spatrick elif arg in {'-D', '-I'}: 92*a9ac8606Spatrick result.flags.extend([arg, next(args)]) 93*a9ac8606Spatrick # parameter which looks source file is taken... 94*a9ac8606Spatrick elif re.match(r'^[^-].+', arg) and classify_source(arg): 95*a9ac8606Spatrick result.files.append(arg) 96*a9ac8606Spatrick # and consider everything else as compile option. 97*a9ac8606Spatrick else: 98*a9ac8606Spatrick result.flags.append(arg) 99*a9ac8606Spatrick # do extra check on number of source files 100*a9ac8606Spatrick return result if result.files else None 101*a9ac8606Spatrick 102*a9ac8606Spatrick 103*a9ac8606Spatrickdef classify_source(filename, c_compiler=True): 104*a9ac8606Spatrick """ Return the language from file name extension. """ 105*a9ac8606Spatrick 106*a9ac8606Spatrick mapping = { 107*a9ac8606Spatrick '.c': 'c' if c_compiler else 'c++', 108*a9ac8606Spatrick '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', 109*a9ac8606Spatrick '.ii': 'c++-cpp-output', 110*a9ac8606Spatrick '.m': 'objective-c', 111*a9ac8606Spatrick '.mi': 'objective-c-cpp-output', 112*a9ac8606Spatrick '.mm': 'objective-c++', 113*a9ac8606Spatrick '.mii': 'objective-c++-cpp-output', 114*a9ac8606Spatrick '.C': 'c++', 115*a9ac8606Spatrick '.cc': 'c++', 116*a9ac8606Spatrick '.CC': 'c++', 117*a9ac8606Spatrick '.cp': 'c++', 118*a9ac8606Spatrick '.cpp': 'c++', 119*a9ac8606Spatrick '.cxx': 'c++', 120*a9ac8606Spatrick '.c++': 'c++', 121*a9ac8606Spatrick '.C++': 'c++', 122*a9ac8606Spatrick '.txx': 'c++' 123*a9ac8606Spatrick } 124*a9ac8606Spatrick 125*a9ac8606Spatrick __, extension = os.path.splitext(os.path.basename(filename)) 126*a9ac8606Spatrick return mapping.get(extension) 127*a9ac8606Spatrick 128*a9ac8606Spatrick 129*a9ac8606Spatrickdef compiler_language(command): 130*a9ac8606Spatrick """ A predicate to decide the command is a compiler call or not. 131*a9ac8606Spatrick 132*a9ac8606Spatrick Returns 'c' or 'c++' when it match. None otherwise. """ 133*a9ac8606Spatrick 134*a9ac8606Spatrick cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') 135*a9ac8606Spatrick 136*a9ac8606Spatrick if command: 137*a9ac8606Spatrick executable = os.path.basename(command[0]) 138*a9ac8606Spatrick if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): 139*a9ac8606Spatrick return 'c++' if cplusplus.match(executable) else 'c' 140*a9ac8606Spatrick return None 141