1d9cf8291SDaniel Hwang# -*- coding: utf-8 -*- 2d9cf8291SDaniel Hwang# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 3d9cf8291SDaniel Hwang# See https://llvm.org/LICENSE.txt for license information. 4d9cf8291SDaniel Hwang# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 5d9cf8291SDaniel Hwang""" This module is responsible for to parse a compiler invocation. """ 6d9cf8291SDaniel Hwang 7d9cf8291SDaniel Hwangimport re 8d9cf8291SDaniel Hwangimport os 9d9cf8291SDaniel Hwangimport collections 10d9cf8291SDaniel Hwang 11*dd3c26a0STobias Hieta__all__ = ["split_command", "classify_source", "compiler_language"] 12d9cf8291SDaniel Hwang 13d9cf8291SDaniel Hwang# Ignored compiler options map for compilation database creation. 14d9cf8291SDaniel Hwang# The map is used in `split_command` method. (Which does ignore and classify 15d9cf8291SDaniel Hwang# parameters.) Please note, that these are not the only parameters which 16d9cf8291SDaniel Hwang# might be ignored. 17d9cf8291SDaniel Hwang# 18d9cf8291SDaniel Hwang# Keys are the option name, value number of options to skip 19d9cf8291SDaniel HwangIGNORED_FLAGS = { 20d9cf8291SDaniel Hwang # compiling only flag, ignored because the creator of compilation 21d9cf8291SDaniel Hwang # database will explicitly set it. 22*dd3c26a0STobias Hieta "-c": 0, 23d9cf8291SDaniel Hwang # preprocessor macros, ignored because would cause duplicate entries in 24d9cf8291SDaniel Hwang # the output (the only difference would be these flags). this is actual 25d9cf8291SDaniel Hwang # finding from users, who suffered longer execution time caused by the 26d9cf8291SDaniel Hwang # duplicates. 27*dd3c26a0STobias Hieta "-MD": 0, 28*dd3c26a0STobias Hieta "-MMD": 0, 29*dd3c26a0STobias Hieta "-MG": 0, 30*dd3c26a0STobias Hieta "-MP": 0, 31*dd3c26a0STobias Hieta "-MF": 1, 32*dd3c26a0STobias Hieta "-MT": 1, 33*dd3c26a0STobias Hieta "-MQ": 1, 34d9cf8291SDaniel Hwang # linker options, ignored because for compilation database will contain 35d9cf8291SDaniel Hwang # compilation commands only. so, the compiler would ignore these flags 36d9cf8291SDaniel Hwang # anyway. the benefit to get rid of them is to make the output more 37d9cf8291SDaniel Hwang # readable. 38*dd3c26a0STobias Hieta "-static": 0, 39*dd3c26a0STobias Hieta "-shared": 0, 40*dd3c26a0STobias Hieta "-s": 0, 41*dd3c26a0STobias Hieta "-rdynamic": 0, 42*dd3c26a0STobias Hieta "-l": 1, 43*dd3c26a0STobias Hieta "-L": 1, 44*dd3c26a0STobias Hieta "-u": 1, 45*dd3c26a0STobias Hieta "-z": 1, 46*dd3c26a0STobias Hieta "-T": 1, 47*dd3c26a0STobias Hieta "-Xlinker": 1, 48d9cf8291SDaniel Hwang} 49d9cf8291SDaniel Hwang 50d9cf8291SDaniel Hwang# Known C/C++ compiler executable name patterns 51*dd3c26a0STobias HietaCOMPILER_PATTERNS = frozenset( 52*dd3c26a0STobias Hieta [ 53*dd3c26a0STobias Hieta re.compile(r"^(intercept-|analyze-|)c(c|\+\+)$"), 54*dd3c26a0STobias Hieta re.compile(r"^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$"), 55*dd3c26a0STobias Hieta re.compile(r"^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$"), 56*dd3c26a0STobias Hieta re.compile(r"^llvm-g(cc|\+\+)$"), 57*dd3c26a0STobias Hieta ] 58*dd3c26a0STobias Hieta) 59d9cf8291SDaniel Hwang 60d9cf8291SDaniel Hwang 61d9cf8291SDaniel Hwangdef split_command(command): 62d9cf8291SDaniel Hwang """Returns a value when the command is a compilation, None otherwise. 63d9cf8291SDaniel Hwang 64d9cf8291SDaniel Hwang The value on success is a named tuple with the following attributes: 65d9cf8291SDaniel Hwang 66d9cf8291SDaniel Hwang files: list of source files 67d9cf8291SDaniel Hwang flags: list of compile options 68d9cf8291SDaniel Hwang compiler: string value of 'c' or 'c++'""" 69d9cf8291SDaniel Hwang 70d9cf8291SDaniel Hwang # the result of this method 71*dd3c26a0STobias Hieta result = collections.namedtuple("Compilation", ["compiler", "flags", "files"]) 72d9cf8291SDaniel Hwang result.compiler = compiler_language(command) 73d9cf8291SDaniel Hwang result.flags = [] 74d9cf8291SDaniel Hwang result.files = [] 75d9cf8291SDaniel Hwang # quit right now, if the program was not a C/C++ compiler 76d9cf8291SDaniel Hwang if not result.compiler: 77d9cf8291SDaniel Hwang return None 78d9cf8291SDaniel Hwang # iterate on the compile options 79d9cf8291SDaniel Hwang args = iter(command[1:]) 80d9cf8291SDaniel Hwang for arg in args: 81d9cf8291SDaniel Hwang # quit when compilation pass is not involved 82*dd3c26a0STobias Hieta if arg in {"-E", "-S", "-cc1", "-M", "-MM", "-###"}: 83d9cf8291SDaniel Hwang return None 84d9cf8291SDaniel Hwang # ignore some flags 85d9cf8291SDaniel Hwang elif arg in IGNORED_FLAGS: 86d9cf8291SDaniel Hwang count = IGNORED_FLAGS[arg] 87d9cf8291SDaniel Hwang for _ in range(count): 88d9cf8291SDaniel Hwang next(args) 89*dd3c26a0STobias Hieta elif re.match(r"^-(l|L|Wl,).+", arg): 90d9cf8291SDaniel Hwang pass 91d9cf8291SDaniel Hwang # some parameters could look like filename, take as compile option 92*dd3c26a0STobias Hieta elif arg in {"-D", "-I"}: 93d9cf8291SDaniel Hwang result.flags.extend([arg, next(args)]) 94d9cf8291SDaniel Hwang # parameter which looks source file is taken... 95*dd3c26a0STobias Hieta elif re.match(r"^[^-].+", arg) and classify_source(arg): 96d9cf8291SDaniel Hwang result.files.append(arg) 97d9cf8291SDaniel Hwang # and consider everything else as compile option. 98d9cf8291SDaniel Hwang else: 99d9cf8291SDaniel Hwang result.flags.append(arg) 100d9cf8291SDaniel Hwang # do extra check on number of source files 101d9cf8291SDaniel Hwang return result if result.files else None 102d9cf8291SDaniel Hwang 103d9cf8291SDaniel Hwang 104d9cf8291SDaniel Hwangdef classify_source(filename, c_compiler=True): 105d9cf8291SDaniel Hwang """Return the language from file name extension.""" 106d9cf8291SDaniel Hwang 107d9cf8291SDaniel Hwang mapping = { 108*dd3c26a0STobias Hieta ".c": "c" if c_compiler else "c++", 109*dd3c26a0STobias Hieta ".i": "c-cpp-output" if c_compiler else "c++-cpp-output", 110*dd3c26a0STobias Hieta ".ii": "c++-cpp-output", 111*dd3c26a0STobias Hieta ".m": "objective-c", 112*dd3c26a0STobias Hieta ".mi": "objective-c-cpp-output", 113*dd3c26a0STobias Hieta ".mm": "objective-c++", 114*dd3c26a0STobias Hieta ".mii": "objective-c++-cpp-output", 115*dd3c26a0STobias Hieta ".C": "c++", 116*dd3c26a0STobias Hieta ".cc": "c++", 117*dd3c26a0STobias Hieta ".CC": "c++", 118*dd3c26a0STobias Hieta ".cp": "c++", 119*dd3c26a0STobias Hieta ".cpp": "c++", 120*dd3c26a0STobias Hieta ".cxx": "c++", 121*dd3c26a0STobias Hieta ".c++": "c++", 122*dd3c26a0STobias Hieta ".C++": "c++", 123*dd3c26a0STobias Hieta ".txx": "c++", 124d9cf8291SDaniel Hwang } 125d9cf8291SDaniel Hwang 126d9cf8291SDaniel Hwang __, extension = os.path.splitext(os.path.basename(filename)) 127d9cf8291SDaniel Hwang return mapping.get(extension) 128d9cf8291SDaniel Hwang 129d9cf8291SDaniel Hwang 130d9cf8291SDaniel Hwangdef compiler_language(command): 131d9cf8291SDaniel Hwang """A predicate to decide the command is a compiler call or not. 132d9cf8291SDaniel Hwang 133d9cf8291SDaniel Hwang Returns 'c' or 'c++' when it match. None otherwise.""" 134d9cf8291SDaniel Hwang 135*dd3c26a0STobias Hieta cplusplus = re.compile(r"^(.+)(\+\+)(-.+|)$") 136d9cf8291SDaniel Hwang 137d9cf8291SDaniel Hwang if command: 138d9cf8291SDaniel Hwang executable = os.path.basename(command[0]) 139d9cf8291SDaniel Hwang if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): 140*dd3c26a0STobias Hieta return "c++" if cplusplus.match(executable) else "c" 141d9cf8291SDaniel Hwang return None 142