xref: /llvm-project/clang/tools/scan-build-py/lib/libscanbuild/compilation.py (revision dd3c26a045c081620375a878159f536758baba6e)
1d9cf8291SDaniel Hwang# -*- coding: utf-8 -*-
2d9cf8291SDaniel Hwang# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3d9cf8291SDaniel Hwang# See https://llvm.org/LICENSE.txt for license information.
4d9cf8291SDaniel Hwang# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5d9cf8291SDaniel Hwang""" This module is responsible for to parse a compiler invocation. """
6d9cf8291SDaniel Hwang
7d9cf8291SDaniel Hwangimport re
8d9cf8291SDaniel Hwangimport os
9d9cf8291SDaniel Hwangimport collections
10d9cf8291SDaniel Hwang
11*dd3c26a0STobias Hieta__all__ = ["split_command", "classify_source", "compiler_language"]
12d9cf8291SDaniel Hwang
13d9cf8291SDaniel Hwang# Ignored compiler options map for compilation database creation.
14d9cf8291SDaniel Hwang# The map is used in `split_command` method. (Which does ignore and classify
15d9cf8291SDaniel Hwang# parameters.) Please note, that these are not the only parameters which
16d9cf8291SDaniel Hwang# might be ignored.
17d9cf8291SDaniel Hwang#
18d9cf8291SDaniel Hwang# Keys are the option name, value number of options to skip
19d9cf8291SDaniel HwangIGNORED_FLAGS = {
20d9cf8291SDaniel Hwang    # compiling only flag, ignored because the creator of compilation
21d9cf8291SDaniel Hwang    # database will explicitly set it.
22*dd3c26a0STobias Hieta    "-c": 0,
23d9cf8291SDaniel Hwang    # preprocessor macros, ignored because would cause duplicate entries in
24d9cf8291SDaniel Hwang    # the output (the only difference would be these flags). this is actual
25d9cf8291SDaniel Hwang    # finding from users, who suffered longer execution time caused by the
26d9cf8291SDaniel Hwang    # duplicates.
27*dd3c26a0STobias Hieta    "-MD": 0,
28*dd3c26a0STobias Hieta    "-MMD": 0,
29*dd3c26a0STobias Hieta    "-MG": 0,
30*dd3c26a0STobias Hieta    "-MP": 0,
31*dd3c26a0STobias Hieta    "-MF": 1,
32*dd3c26a0STobias Hieta    "-MT": 1,
33*dd3c26a0STobias Hieta    "-MQ": 1,
34d9cf8291SDaniel Hwang    # linker options, ignored because for compilation database will contain
35d9cf8291SDaniel Hwang    # compilation commands only. so, the compiler would ignore these flags
36d9cf8291SDaniel Hwang    # anyway. the benefit to get rid of them is to make the output more
37d9cf8291SDaniel Hwang    # readable.
38*dd3c26a0STobias Hieta    "-static": 0,
39*dd3c26a0STobias Hieta    "-shared": 0,
40*dd3c26a0STobias Hieta    "-s": 0,
41*dd3c26a0STobias Hieta    "-rdynamic": 0,
42*dd3c26a0STobias Hieta    "-l": 1,
43*dd3c26a0STobias Hieta    "-L": 1,
44*dd3c26a0STobias Hieta    "-u": 1,
45*dd3c26a0STobias Hieta    "-z": 1,
46*dd3c26a0STobias Hieta    "-T": 1,
47*dd3c26a0STobias Hieta    "-Xlinker": 1,
48d9cf8291SDaniel Hwang}
49d9cf8291SDaniel Hwang
50d9cf8291SDaniel Hwang# Known C/C++ compiler executable name patterns
51*dd3c26a0STobias HietaCOMPILER_PATTERNS = frozenset(
52*dd3c26a0STobias Hieta    [
53*dd3c26a0STobias Hieta        re.compile(r"^(intercept-|analyze-|)c(c|\+\+)$"),
54*dd3c26a0STobias Hieta        re.compile(r"^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$"),
55*dd3c26a0STobias Hieta        re.compile(r"^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$"),
56*dd3c26a0STobias Hieta        re.compile(r"^llvm-g(cc|\+\+)$"),
57*dd3c26a0STobias Hieta    ]
58*dd3c26a0STobias Hieta)
59d9cf8291SDaniel Hwang
60d9cf8291SDaniel Hwang
61d9cf8291SDaniel Hwangdef split_command(command):
62d9cf8291SDaniel Hwang    """Returns a value when the command is a compilation, None otherwise.
63d9cf8291SDaniel Hwang
64d9cf8291SDaniel Hwang    The value on success is a named tuple with the following attributes:
65d9cf8291SDaniel Hwang
66d9cf8291SDaniel Hwang        files:    list of source files
67d9cf8291SDaniel Hwang        flags:    list of compile options
68d9cf8291SDaniel Hwang        compiler: string value of 'c' or 'c++'"""
69d9cf8291SDaniel Hwang
70d9cf8291SDaniel Hwang    # the result of this method
71*dd3c26a0STobias Hieta    result = collections.namedtuple("Compilation", ["compiler", "flags", "files"])
72d9cf8291SDaniel Hwang    result.compiler = compiler_language(command)
73d9cf8291SDaniel Hwang    result.flags = []
74d9cf8291SDaniel Hwang    result.files = []
75d9cf8291SDaniel Hwang    # quit right now, if the program was not a C/C++ compiler
76d9cf8291SDaniel Hwang    if not result.compiler:
77d9cf8291SDaniel Hwang        return None
78d9cf8291SDaniel Hwang    # iterate on the compile options
79d9cf8291SDaniel Hwang    args = iter(command[1:])
80d9cf8291SDaniel Hwang    for arg in args:
81d9cf8291SDaniel Hwang        # quit when compilation pass is not involved
82*dd3c26a0STobias Hieta        if arg in {"-E", "-S", "-cc1", "-M", "-MM", "-###"}:
83d9cf8291SDaniel Hwang            return None
84d9cf8291SDaniel Hwang        # ignore some flags
85d9cf8291SDaniel Hwang        elif arg in IGNORED_FLAGS:
86d9cf8291SDaniel Hwang            count = IGNORED_FLAGS[arg]
87d9cf8291SDaniel Hwang            for _ in range(count):
88d9cf8291SDaniel Hwang                next(args)
89*dd3c26a0STobias Hieta        elif re.match(r"^-(l|L|Wl,).+", arg):
90d9cf8291SDaniel Hwang            pass
91d9cf8291SDaniel Hwang        # some parameters could look like filename, take as compile option
92*dd3c26a0STobias Hieta        elif arg in {"-D", "-I"}:
93d9cf8291SDaniel Hwang            result.flags.extend([arg, next(args)])
94d9cf8291SDaniel Hwang        # parameter which looks source file is taken...
95*dd3c26a0STobias Hieta        elif re.match(r"^[^-].+", arg) and classify_source(arg):
96d9cf8291SDaniel Hwang            result.files.append(arg)
97d9cf8291SDaniel Hwang        # and consider everything else as compile option.
98d9cf8291SDaniel Hwang        else:
99d9cf8291SDaniel Hwang            result.flags.append(arg)
100d9cf8291SDaniel Hwang    # do extra check on number of source files
101d9cf8291SDaniel Hwang    return result if result.files else None
102d9cf8291SDaniel Hwang
103d9cf8291SDaniel Hwang
104d9cf8291SDaniel Hwangdef classify_source(filename, c_compiler=True):
105d9cf8291SDaniel Hwang    """Return the language from file name extension."""
106d9cf8291SDaniel Hwang
107d9cf8291SDaniel Hwang    mapping = {
108*dd3c26a0STobias Hieta        ".c": "c" if c_compiler else "c++",
109*dd3c26a0STobias Hieta        ".i": "c-cpp-output" if c_compiler else "c++-cpp-output",
110*dd3c26a0STobias Hieta        ".ii": "c++-cpp-output",
111*dd3c26a0STobias Hieta        ".m": "objective-c",
112*dd3c26a0STobias Hieta        ".mi": "objective-c-cpp-output",
113*dd3c26a0STobias Hieta        ".mm": "objective-c++",
114*dd3c26a0STobias Hieta        ".mii": "objective-c++-cpp-output",
115*dd3c26a0STobias Hieta        ".C": "c++",
116*dd3c26a0STobias Hieta        ".cc": "c++",
117*dd3c26a0STobias Hieta        ".CC": "c++",
118*dd3c26a0STobias Hieta        ".cp": "c++",
119*dd3c26a0STobias Hieta        ".cpp": "c++",
120*dd3c26a0STobias Hieta        ".cxx": "c++",
121*dd3c26a0STobias Hieta        ".c++": "c++",
122*dd3c26a0STobias Hieta        ".C++": "c++",
123*dd3c26a0STobias Hieta        ".txx": "c++",
124d9cf8291SDaniel Hwang    }
125d9cf8291SDaniel Hwang
126d9cf8291SDaniel Hwang    __, extension = os.path.splitext(os.path.basename(filename))
127d9cf8291SDaniel Hwang    return mapping.get(extension)
128d9cf8291SDaniel Hwang
129d9cf8291SDaniel Hwang
130d9cf8291SDaniel Hwangdef compiler_language(command):
131d9cf8291SDaniel Hwang    """A predicate to decide the command is a compiler call or not.
132d9cf8291SDaniel Hwang
133d9cf8291SDaniel Hwang    Returns 'c' or 'c++' when it match. None otherwise."""
134d9cf8291SDaniel Hwang
135*dd3c26a0STobias Hieta    cplusplus = re.compile(r"^(.+)(\+\+)(-.+|)$")
136d9cf8291SDaniel Hwang
137d9cf8291SDaniel Hwang    if command:
138d9cf8291SDaniel Hwang        executable = os.path.basename(command[0])
139d9cf8291SDaniel Hwang        if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
140*dd3c26a0STobias Hieta            return "c++" if cplusplus.match(executable) else "c"
141d9cf8291SDaniel Hwang    return None
142