xref: /netbsd-src/external/apache2/llvm/dist/clang/tools/scan-build-py/libscanbuild/compilation.py (revision 7330f729ccf0bd976a06f95fad452fe774fc7fd1)
1*7330f729Sjoerg# -*- coding: utf-8 -*-
2*7330f729Sjoerg# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3*7330f729Sjoerg# See https://llvm.org/LICENSE.txt for license information.
4*7330f729Sjoerg# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5*7330f729Sjoerg""" This module is responsible for to parse a compiler invocation. """
6*7330f729Sjoerg
7*7330f729Sjoergimport re
8*7330f729Sjoergimport os
9*7330f729Sjoergimport collections
10*7330f729Sjoerg
11*7330f729Sjoerg__all__ = ['split_command', 'classify_source', 'compiler_language']
12*7330f729Sjoerg
13*7330f729Sjoerg# Ignored compiler options map for compilation database creation.
14*7330f729Sjoerg# The map is used in `split_command` method. (Which does ignore and classify
15*7330f729Sjoerg# parameters.) Please note, that these are not the only parameters which
16*7330f729Sjoerg# might be ignored.
17*7330f729Sjoerg#
18*7330f729Sjoerg# Keys are the option name, value number of options to skip
19*7330f729SjoergIGNORED_FLAGS = {
20*7330f729Sjoerg    # compiling only flag, ignored because the creator of compilation
21*7330f729Sjoerg    # database will explicitly set it.
22*7330f729Sjoerg    '-c': 0,
23*7330f729Sjoerg    # preprocessor macros, ignored because would cause duplicate entries in
24*7330f729Sjoerg    # the output (the only difference would be these flags). this is actual
25*7330f729Sjoerg    # finding from users, who suffered longer execution time caused by the
26*7330f729Sjoerg    # duplicates.
27*7330f729Sjoerg    '-MD': 0,
28*7330f729Sjoerg    '-MMD': 0,
29*7330f729Sjoerg    '-MG': 0,
30*7330f729Sjoerg    '-MP': 0,
31*7330f729Sjoerg    '-MF': 1,
32*7330f729Sjoerg    '-MT': 1,
33*7330f729Sjoerg    '-MQ': 1,
34*7330f729Sjoerg    # linker options, ignored because for compilation database will contain
35*7330f729Sjoerg    # compilation commands only. so, the compiler would ignore these flags
36*7330f729Sjoerg    # anyway. the benefit to get rid of them is to make the output more
37*7330f729Sjoerg    # readable.
38*7330f729Sjoerg    '-static': 0,
39*7330f729Sjoerg    '-shared': 0,
40*7330f729Sjoerg    '-s': 0,
41*7330f729Sjoerg    '-rdynamic': 0,
42*7330f729Sjoerg    '-l': 1,
43*7330f729Sjoerg    '-L': 1,
44*7330f729Sjoerg    '-u': 1,
45*7330f729Sjoerg    '-z': 1,
46*7330f729Sjoerg    '-T': 1,
47*7330f729Sjoerg    '-Xlinker': 1
48*7330f729Sjoerg}
49*7330f729Sjoerg
50*7330f729Sjoerg# Known C/C++ compiler executable name patterns
51*7330f729SjoergCOMPILER_PATTERNS = frozenset([
52*7330f729Sjoerg    re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'),
53*7330f729Sjoerg    re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'),
54*7330f729Sjoerg    re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
55*7330f729Sjoerg    re.compile(r'^llvm-g(cc|\+\+)$'),
56*7330f729Sjoerg])
57*7330f729Sjoerg
58*7330f729Sjoerg
59*7330f729Sjoergdef split_command(command):
60*7330f729Sjoerg    """ Returns a value when the command is a compilation, None otherwise.
61*7330f729Sjoerg
62*7330f729Sjoerg    The value on success is a named tuple with the following attributes:
63*7330f729Sjoerg
64*7330f729Sjoerg        files:    list of source files
65*7330f729Sjoerg        flags:    list of compile options
66*7330f729Sjoerg        compiler: string value of 'c' or 'c++' """
67*7330f729Sjoerg
68*7330f729Sjoerg    # the result of this method
69*7330f729Sjoerg    result = collections.namedtuple('Compilation',
70*7330f729Sjoerg                                    ['compiler', 'flags', 'files'])
71*7330f729Sjoerg    result.compiler = compiler_language(command)
72*7330f729Sjoerg    result.flags = []
73*7330f729Sjoerg    result.files = []
74*7330f729Sjoerg    # quit right now, if the program was not a C/C++ compiler
75*7330f729Sjoerg    if not result.compiler:
76*7330f729Sjoerg        return None
77*7330f729Sjoerg    # iterate on the compile options
78*7330f729Sjoerg    args = iter(command[1:])
79*7330f729Sjoerg    for arg in args:
80*7330f729Sjoerg        # quit when compilation pass is not involved
81*7330f729Sjoerg        if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
82*7330f729Sjoerg            return None
83*7330f729Sjoerg        # ignore some flags
84*7330f729Sjoerg        elif arg in IGNORED_FLAGS:
85*7330f729Sjoerg            count = IGNORED_FLAGS[arg]
86*7330f729Sjoerg            for _ in range(count):
87*7330f729Sjoerg                next(args)
88*7330f729Sjoerg        elif re.match(r'^-(l|L|Wl,).+', arg):
89*7330f729Sjoerg            pass
90*7330f729Sjoerg        # some parameters could look like filename, take as compile option
91*7330f729Sjoerg        elif arg in {'-D', '-I'}:
92*7330f729Sjoerg            result.flags.extend([arg, next(args)])
93*7330f729Sjoerg        # parameter which looks source file is taken...
94*7330f729Sjoerg        elif re.match(r'^[^-].+', arg) and classify_source(arg):
95*7330f729Sjoerg            result.files.append(arg)
96*7330f729Sjoerg        # and consider everything else as compile option.
97*7330f729Sjoerg        else:
98*7330f729Sjoerg            result.flags.append(arg)
99*7330f729Sjoerg    # do extra check on number of source files
100*7330f729Sjoerg    return result if result.files else None
101*7330f729Sjoerg
102*7330f729Sjoerg
103*7330f729Sjoergdef classify_source(filename, c_compiler=True):
104*7330f729Sjoerg    """ Return the language from file name extension. """
105*7330f729Sjoerg
106*7330f729Sjoerg    mapping = {
107*7330f729Sjoerg        '.c': 'c' if c_compiler else 'c++',
108*7330f729Sjoerg        '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
109*7330f729Sjoerg        '.ii': 'c++-cpp-output',
110*7330f729Sjoerg        '.m': 'objective-c',
111*7330f729Sjoerg        '.mi': 'objective-c-cpp-output',
112*7330f729Sjoerg        '.mm': 'objective-c++',
113*7330f729Sjoerg        '.mii': 'objective-c++-cpp-output',
114*7330f729Sjoerg        '.C': 'c++',
115*7330f729Sjoerg        '.cc': 'c++',
116*7330f729Sjoerg        '.CC': 'c++',
117*7330f729Sjoerg        '.cp': 'c++',
118*7330f729Sjoerg        '.cpp': 'c++',
119*7330f729Sjoerg        '.cxx': 'c++',
120*7330f729Sjoerg        '.c++': 'c++',
121*7330f729Sjoerg        '.C++': 'c++',
122*7330f729Sjoerg        '.txx': 'c++'
123*7330f729Sjoerg    }
124*7330f729Sjoerg
125*7330f729Sjoerg    __, extension = os.path.splitext(os.path.basename(filename))
126*7330f729Sjoerg    return mapping.get(extension)
127*7330f729Sjoerg
128*7330f729Sjoerg
129*7330f729Sjoergdef compiler_language(command):
130*7330f729Sjoerg    """ A predicate to decide the command is a compiler call or not.
131*7330f729Sjoerg
132*7330f729Sjoerg    Returns 'c' or 'c++' when it match. None otherwise. """
133*7330f729Sjoerg
134*7330f729Sjoerg    cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$')
135*7330f729Sjoerg
136*7330f729Sjoerg    if command:
137*7330f729Sjoerg        executable = os.path.basename(command[0])
138*7330f729Sjoerg        if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
139*7330f729Sjoerg            return 'c++' if cplusplus.match(executable) else 'c'
140*7330f729Sjoerg    return None
141