xref: /openbsd-src/gnu/llvm/clang/tools/scan-build-py/lib/libscanbuild/compilation.py (revision a9ac8606c53d55cee9c3a39778b249c51df111ef)
1*a9ac8606Spatrick# -*- coding: utf-8 -*-
2*a9ac8606Spatrick# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3*a9ac8606Spatrick# See https://llvm.org/LICENSE.txt for license information.
4*a9ac8606Spatrick# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5*a9ac8606Spatrick""" This module is responsible for to parse a compiler invocation. """
6*a9ac8606Spatrick
7*a9ac8606Spatrickimport re
8*a9ac8606Spatrickimport os
9*a9ac8606Spatrickimport collections
10*a9ac8606Spatrick
11*a9ac8606Spatrick__all__ = ['split_command', 'classify_source', 'compiler_language']
12*a9ac8606Spatrick
13*a9ac8606Spatrick# Ignored compiler options map for compilation database creation.
14*a9ac8606Spatrick# The map is used in `split_command` method. (Which does ignore and classify
15*a9ac8606Spatrick# parameters.) Please note, that these are not the only parameters which
16*a9ac8606Spatrick# might be ignored.
17*a9ac8606Spatrick#
18*a9ac8606Spatrick# Keys are the option name, value number of options to skip
19*a9ac8606SpatrickIGNORED_FLAGS = {
20*a9ac8606Spatrick    # compiling only flag, ignored because the creator of compilation
21*a9ac8606Spatrick    # database will explicitly set it.
22*a9ac8606Spatrick    '-c': 0,
23*a9ac8606Spatrick    # preprocessor macros, ignored because would cause duplicate entries in
24*a9ac8606Spatrick    # the output (the only difference would be these flags). this is actual
25*a9ac8606Spatrick    # finding from users, who suffered longer execution time caused by the
26*a9ac8606Spatrick    # duplicates.
27*a9ac8606Spatrick    '-MD': 0,
28*a9ac8606Spatrick    '-MMD': 0,
29*a9ac8606Spatrick    '-MG': 0,
30*a9ac8606Spatrick    '-MP': 0,
31*a9ac8606Spatrick    '-MF': 1,
32*a9ac8606Spatrick    '-MT': 1,
33*a9ac8606Spatrick    '-MQ': 1,
34*a9ac8606Spatrick    # linker options, ignored because for compilation database will contain
35*a9ac8606Spatrick    # compilation commands only. so, the compiler would ignore these flags
36*a9ac8606Spatrick    # anyway. the benefit to get rid of them is to make the output more
37*a9ac8606Spatrick    # readable.
38*a9ac8606Spatrick    '-static': 0,
39*a9ac8606Spatrick    '-shared': 0,
40*a9ac8606Spatrick    '-s': 0,
41*a9ac8606Spatrick    '-rdynamic': 0,
42*a9ac8606Spatrick    '-l': 1,
43*a9ac8606Spatrick    '-L': 1,
44*a9ac8606Spatrick    '-u': 1,
45*a9ac8606Spatrick    '-z': 1,
46*a9ac8606Spatrick    '-T': 1,
47*a9ac8606Spatrick    '-Xlinker': 1
48*a9ac8606Spatrick}
49*a9ac8606Spatrick
50*a9ac8606Spatrick# Known C/C++ compiler executable name patterns
51*a9ac8606SpatrickCOMPILER_PATTERNS = frozenset([
52*a9ac8606Spatrick    re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'),
53*a9ac8606Spatrick    re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'),
54*a9ac8606Spatrick    re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
55*a9ac8606Spatrick    re.compile(r'^llvm-g(cc|\+\+)$'),
56*a9ac8606Spatrick])
57*a9ac8606Spatrick
58*a9ac8606Spatrick
59*a9ac8606Spatrickdef split_command(command):
60*a9ac8606Spatrick    """ Returns a value when the command is a compilation, None otherwise.
61*a9ac8606Spatrick
62*a9ac8606Spatrick    The value on success is a named tuple with the following attributes:
63*a9ac8606Spatrick
64*a9ac8606Spatrick        files:    list of source files
65*a9ac8606Spatrick        flags:    list of compile options
66*a9ac8606Spatrick        compiler: string value of 'c' or 'c++' """
67*a9ac8606Spatrick
68*a9ac8606Spatrick    # the result of this method
69*a9ac8606Spatrick    result = collections.namedtuple('Compilation',
70*a9ac8606Spatrick                                    ['compiler', 'flags', 'files'])
71*a9ac8606Spatrick    result.compiler = compiler_language(command)
72*a9ac8606Spatrick    result.flags = []
73*a9ac8606Spatrick    result.files = []
74*a9ac8606Spatrick    # quit right now, if the program was not a C/C++ compiler
75*a9ac8606Spatrick    if not result.compiler:
76*a9ac8606Spatrick        return None
77*a9ac8606Spatrick    # iterate on the compile options
78*a9ac8606Spatrick    args = iter(command[1:])
79*a9ac8606Spatrick    for arg in args:
80*a9ac8606Spatrick        # quit when compilation pass is not involved
81*a9ac8606Spatrick        if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
82*a9ac8606Spatrick            return None
83*a9ac8606Spatrick        # ignore some flags
84*a9ac8606Spatrick        elif arg in IGNORED_FLAGS:
85*a9ac8606Spatrick            count = IGNORED_FLAGS[arg]
86*a9ac8606Spatrick            for _ in range(count):
87*a9ac8606Spatrick                next(args)
88*a9ac8606Spatrick        elif re.match(r'^-(l|L|Wl,).+', arg):
89*a9ac8606Spatrick            pass
90*a9ac8606Spatrick        # some parameters could look like filename, take as compile option
91*a9ac8606Spatrick        elif arg in {'-D', '-I'}:
92*a9ac8606Spatrick            result.flags.extend([arg, next(args)])
93*a9ac8606Spatrick        # parameter which looks source file is taken...
94*a9ac8606Spatrick        elif re.match(r'^[^-].+', arg) and classify_source(arg):
95*a9ac8606Spatrick            result.files.append(arg)
96*a9ac8606Spatrick        # and consider everything else as compile option.
97*a9ac8606Spatrick        else:
98*a9ac8606Spatrick            result.flags.append(arg)
99*a9ac8606Spatrick    # do extra check on number of source files
100*a9ac8606Spatrick    return result if result.files else None
101*a9ac8606Spatrick
102*a9ac8606Spatrick
103*a9ac8606Spatrickdef classify_source(filename, c_compiler=True):
104*a9ac8606Spatrick    """ Return the language from file name extension. """
105*a9ac8606Spatrick
106*a9ac8606Spatrick    mapping = {
107*a9ac8606Spatrick        '.c': 'c' if c_compiler else 'c++',
108*a9ac8606Spatrick        '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
109*a9ac8606Spatrick        '.ii': 'c++-cpp-output',
110*a9ac8606Spatrick        '.m': 'objective-c',
111*a9ac8606Spatrick        '.mi': 'objective-c-cpp-output',
112*a9ac8606Spatrick        '.mm': 'objective-c++',
113*a9ac8606Spatrick        '.mii': 'objective-c++-cpp-output',
114*a9ac8606Spatrick        '.C': 'c++',
115*a9ac8606Spatrick        '.cc': 'c++',
116*a9ac8606Spatrick        '.CC': 'c++',
117*a9ac8606Spatrick        '.cp': 'c++',
118*a9ac8606Spatrick        '.cpp': 'c++',
119*a9ac8606Spatrick        '.cxx': 'c++',
120*a9ac8606Spatrick        '.c++': 'c++',
121*a9ac8606Spatrick        '.C++': 'c++',
122*a9ac8606Spatrick        '.txx': 'c++'
123*a9ac8606Spatrick    }
124*a9ac8606Spatrick
125*a9ac8606Spatrick    __, extension = os.path.splitext(os.path.basename(filename))
126*a9ac8606Spatrick    return mapping.get(extension)
127*a9ac8606Spatrick
128*a9ac8606Spatrick
129*a9ac8606Spatrickdef compiler_language(command):
130*a9ac8606Spatrick    """ A predicate to decide the command is a compiler call or not.
131*a9ac8606Spatrick
132*a9ac8606Spatrick    Returns 'c' or 'c++' when it match. None otherwise. """
133*a9ac8606Spatrick
134*a9ac8606Spatrick    cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$')
135*a9ac8606Spatrick
136*a9ac8606Spatrick    if command:
137*a9ac8606Spatrick        executable = os.path.basename(command[0])
138*a9ac8606Spatrick        if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
139*a9ac8606Spatrick            return 'c++' if cplusplus.match(executable) else 'c'
140*a9ac8606Spatrick    return None
141