xref: /llvm-project/clang/tools/scan-build-py/lib/libscanbuild/analyze.py (revision ff4abe755279a3a47cc416ef80dbc900d9a98a19)
1# -*- coding: utf-8 -*-
2# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3# See https://llvm.org/LICENSE.txt for license information.
4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5""" This module implements the 'scan-build' command API.
6
7To run the static analyzer against a build is done in multiple steps:
8
9 -- Intercept: capture the compilation command during the build,
10 -- Analyze:   run the analyzer against the captured commands,
11 -- Report:    create a cover report from the analyzer outputs.  """
12
13import re
14import os
15import os.path
16import json
17import logging
18import multiprocessing
19import tempfile
20import functools
21import subprocess
22import contextlib
23import datetime
24import shutil
25import glob
26from collections import defaultdict
27
28from libscanbuild import command_entry_point, compiler_wrapper, \
29    wrapper_environment, run_build, run_command, CtuConfig
30from libscanbuild.arguments import parse_args_for_scan_build, \
31    parse_args_for_analyze_build
32from libscanbuild.intercept import capture
33from libscanbuild.report import document
34from libscanbuild.compilation import split_command, classify_source, \
35    compiler_language
36from libscanbuild.clang import get_version, get_arguments, get_triple_arch, \
37    ClangErrorException
38from libscanbuild.shell import decode
39
40__all__ = ['scan_build', 'analyze_build', 'analyze_compiler_wrapper']
41
42scanbuild_dir = os.path.dirname(os.path.realpath(__import__('sys').argv[0]))
43
44COMPILER_WRAPPER_CC = os.path.join(scanbuild_dir, '..', 'libexec', 'analyze-cc')
45COMPILER_WRAPPER_CXX = os.path.join(scanbuild_dir, '..', 'libexec', 'analyze-c++')
46
47CTU_EXTDEF_MAP_FILENAME = 'externalDefMap.txt'
48CTU_TEMP_DEFMAP_FOLDER = 'tmpExternalDefMaps'
49
50
51@command_entry_point
52def scan_build():
53    """ Entry point for scan-build command. """
54
55    args = parse_args_for_scan_build()
56    # will re-assign the report directory as new output
57    with report_directory(
58            args.output, args.keep_empty, args.output_format) as args.output:
59        # Run against a build command. there are cases, when analyzer run
60        # is not required. But we need to set up everything for the
61        # wrappers, because 'configure' needs to capture the CC/CXX values
62        # for the Makefile.
63        if args.intercept_first:
64            # Run build command with intercept module.
65            exit_code = capture(args)
66            # Run the analyzer against the captured commands.
67            if need_analyzer(args.build):
68                govern_analyzer_runs(args)
69        else:
70            # Run build command and analyzer with compiler wrappers.
71            environment = setup_environment(args)
72            exit_code = run_build(args.build, env=environment)
73        # Cover report generation and bug counting.
74        number_of_bugs = document(args)
75        # Set exit status as it was requested.
76        return number_of_bugs if args.status_bugs else exit_code
77
78
79@command_entry_point
80def analyze_build():
81    """ Entry point for analyze-build command. """
82
83    args = parse_args_for_analyze_build()
84    # will re-assign the report directory as new output
85    with report_directory(args.output, args.keep_empty, args.output_format) as args.output:
86        # Run the analyzer against a compilation db.
87        govern_analyzer_runs(args)
88        # Cover report generation and bug counting.
89        number_of_bugs = document(args)
90        # Set exit status as it was requested.
91        return number_of_bugs if args.status_bugs else 0
92
93
94def need_analyzer(args):
95    """ Check the intent of the build command.
96
97    When static analyzer run against project configure step, it should be
98    silent and no need to run the analyzer or generate report.
99
100    To run `scan-build` against the configure step might be necessary,
101    when compiler wrappers are used. That's the moment when build setup
102    check the compiler and capture the location for the build process. """
103
104    return len(args) and not re.search(r'configure|autogen', args[0])
105
106
107def prefix_with(constant, pieces):
108    """ From a sequence create another sequence where every second element
109    is from the original sequence and the odd elements are the prefix.
110
111    eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3] """
112
113    return [elem for piece in pieces for elem in [constant, piece]]
114
115
116def get_ctu_config_from_args(args):
117    """ CTU configuration is created from the chosen phases and dir. """
118
119    return (
120        CtuConfig(collect=args.ctu_phases.collect,
121                  analyze=args.ctu_phases.analyze,
122                  dir=args.ctu_dir,
123                  extdef_map_cmd=args.extdef_map_cmd)
124        if hasattr(args, 'ctu_phases') and hasattr(args.ctu_phases, 'dir')
125        else CtuConfig(collect=False, analyze=False, dir='', extdef_map_cmd=''))
126
127
128def get_ctu_config_from_json(ctu_conf_json):
129    """ CTU configuration is created from the chosen phases and dir. """
130
131    ctu_config = json.loads(ctu_conf_json)
132    # Recover namedtuple from json when coming from analyze-cc or analyze-c++
133    return CtuConfig(collect=ctu_config[0],
134                     analyze=ctu_config[1],
135                     dir=ctu_config[2],
136                     extdef_map_cmd=ctu_config[3])
137
138
139def create_global_ctu_extdef_map(extdef_map_lines):
140    """ Takes iterator of individual external definition maps and creates a
141    global map keeping only unique names. We leave conflicting names out of
142    CTU.
143
144    :param extdef_map_lines: Contains the id of a definition (mangled name) and
145    the originating source (the corresponding AST file) name.
146    :type extdef_map_lines: Iterator of str.
147    :returns: Mangled name - AST file pairs.
148    :rtype: List of (str, str) tuples.
149    """
150
151    mangled_to_asts = defaultdict(set)
152
153    for line in extdef_map_lines:
154        mangled_name, ast_file = line.strip().split(' ', 1)
155        mangled_to_asts[mangled_name].add(ast_file)
156
157    mangled_ast_pairs = []
158
159    for mangled_name, ast_files in mangled_to_asts.items():
160        if len(ast_files) == 1:
161            mangled_ast_pairs.append((mangled_name, next(iter(ast_files))))
162
163    return mangled_ast_pairs
164
165
166def merge_ctu_extdef_maps(ctudir):
167    """ Merge individual external definition maps into a global one.
168
169    As the collect phase runs parallel on multiple threads, all compilation
170    units are separately mapped into a temporary file in CTU_TEMP_DEFMAP_FOLDER.
171    These definition maps contain the mangled names and the source
172    (AST generated from the source) which had their definition.
173    These files should be merged at the end into a global map file:
174    CTU_EXTDEF_MAP_FILENAME."""
175
176    def generate_extdef_map_lines(extdefmap_dir):
177        """ Iterate over all lines of input files in a determined order. """
178
179        files = glob.glob(os.path.join(extdefmap_dir, '*'))
180        files.sort()
181        for filename in files:
182            with open(filename, 'r') as in_file:
183                for line in in_file:
184                    yield line
185
186    def write_global_map(arch, mangled_ast_pairs):
187        """ Write (mangled name, ast file) pairs into final file. """
188
189        extern_defs_map_file = os.path.join(ctudir, arch,
190                                           CTU_EXTDEF_MAP_FILENAME)
191        with open(extern_defs_map_file, 'w') as out_file:
192            for mangled_name, ast_file in mangled_ast_pairs:
193                out_file.write('%s %s\n' % (mangled_name, ast_file))
194
195    triple_arches = glob.glob(os.path.join(ctudir, '*'))
196    for triple_path in triple_arches:
197        if os.path.isdir(triple_path):
198            triple_arch = os.path.basename(triple_path)
199            extdefmap_dir = os.path.join(ctudir, triple_arch,
200                                     CTU_TEMP_DEFMAP_FOLDER)
201
202            extdef_map_lines = generate_extdef_map_lines(extdefmap_dir)
203            mangled_ast_pairs = create_global_ctu_extdef_map(extdef_map_lines)
204            write_global_map(triple_arch, mangled_ast_pairs)
205
206            # Remove all temporary files
207            shutil.rmtree(extdefmap_dir, ignore_errors=True)
208
209
210def run_analyzer_parallel(args):
211    """ Runs the analyzer against the given compilation database. """
212
213    def exclude(filename, directory):
214        """ Return true when any excluded directory prefix the filename. """
215        if not os.path.isabs(filename):
216            # filename is either absolute or relative to directory. Need to turn
217            # it to absolute since 'args.excludes' are absolute paths.
218            filename = os.path.normpath(os.path.join(directory, filename))
219        return any(re.match(r'^' + exclude_directory, filename)
220                   for exclude_directory in args.excludes)
221
222    consts = {
223        'clang': args.clang,
224        'output_dir': args.output,
225        'output_format': args.output_format,
226        'output_failures': args.output_failures,
227        'direct_args': analyzer_params(args),
228        'force_debug': args.force_debug,
229        'ctu': get_ctu_config_from_args(args)
230    }
231
232    logging.debug('run analyzer against compilation database')
233    with open(args.cdb, 'r') as handle:
234        generator = (dict(cmd, **consts)
235                     for cmd in json.load(handle) if not exclude(
236                            cmd['file'], cmd['directory']))
237        # when verbose output requested execute sequentially
238        pool = multiprocessing.Pool(1 if args.verbose > 2 else None)
239        for current in pool.imap_unordered(run, generator):
240            if current is not None:
241                # display error message from the static analyzer
242                for line in current['error_output']:
243                    logging.info(line.rstrip())
244        pool.close()
245        pool.join()
246
247
248def govern_analyzer_runs(args):
249    """ Governs multiple runs in CTU mode or runs once in normal mode. """
250
251    ctu_config = get_ctu_config_from_args(args)
252    # If we do a CTU collect (1st phase) we remove all previous collection
253    # data first.
254    if ctu_config.collect:
255        shutil.rmtree(ctu_config.dir, ignore_errors=True)
256
257    # If the user asked for a collect (1st) and analyze (2nd) phase, we do an
258    # all-in-one run where we deliberately remove collection data before and
259    # also after the run. If the user asks only for a single phase data is
260    # left so multiple analyze runs can use the same data gathered by a single
261    # collection run.
262    if ctu_config.collect and ctu_config.analyze:
263        # CTU strings are coming from args.ctu_dir and extdef_map_cmd,
264        # so we can leave it empty
265        args.ctu_phases = CtuConfig(collect=True, analyze=False,
266                                    dir='', extdef_map_cmd='')
267        run_analyzer_parallel(args)
268        merge_ctu_extdef_maps(ctu_config.dir)
269        args.ctu_phases = CtuConfig(collect=False, analyze=True,
270                                    dir='', extdef_map_cmd='')
271        run_analyzer_parallel(args)
272        shutil.rmtree(ctu_config.dir, ignore_errors=True)
273    else:
274        # Single runs (collect or analyze) are launched from here.
275        run_analyzer_parallel(args)
276        if ctu_config.collect:
277            merge_ctu_extdef_maps(ctu_config.dir)
278
279
280def setup_environment(args):
281    """ Set up environment for build command to interpose compiler wrapper. """
282
283    environment = dict(os.environ)
284    environment.update(wrapper_environment(args))
285    environment.update({
286        'CC': COMPILER_WRAPPER_CC,
287        'CXX': COMPILER_WRAPPER_CXX,
288        'ANALYZE_BUILD_CLANG': args.clang if need_analyzer(args.build) else '',
289        'ANALYZE_BUILD_REPORT_DIR': args.output,
290        'ANALYZE_BUILD_REPORT_FORMAT': args.output_format,
291        'ANALYZE_BUILD_REPORT_FAILURES': 'yes' if args.output_failures else '',
292        'ANALYZE_BUILD_PARAMETERS': ' '.join(analyzer_params(args)),
293        'ANALYZE_BUILD_FORCE_DEBUG': 'yes' if args.force_debug else '',
294        'ANALYZE_BUILD_CTU': json.dumps(get_ctu_config_from_args(args))
295    })
296    return environment
297
298
299@command_entry_point
300def analyze_compiler_wrapper():
301    """ Entry point for `analyze-cc` and `analyze-c++` compiler wrappers. """
302
303    return compiler_wrapper(analyze_compiler_wrapper_impl)
304
305
306def analyze_compiler_wrapper_impl(result, execution):
307    """ Implements analyzer compiler wrapper functionality. """
308
309    # don't run analyzer when compilation fails. or when it's not requested.
310    if result or not os.getenv('ANALYZE_BUILD_CLANG'):
311        return
312
313    # check is it a compilation?
314    compilation = split_command(execution.cmd)
315    if compilation is None:
316        return
317    # collect the needed parameters from environment, crash when missing
318    parameters = {
319        'clang': os.getenv('ANALYZE_BUILD_CLANG'),
320        'output_dir': os.getenv('ANALYZE_BUILD_REPORT_DIR'),
321        'output_format': os.getenv('ANALYZE_BUILD_REPORT_FORMAT'),
322        'output_failures': os.getenv('ANALYZE_BUILD_REPORT_FAILURES'),
323        'direct_args': os.getenv('ANALYZE_BUILD_PARAMETERS',
324                                 '').split(' '),
325        'force_debug': os.getenv('ANALYZE_BUILD_FORCE_DEBUG'),
326        'directory': execution.cwd,
327        'command': [execution.cmd[0], '-c'] + compilation.flags,
328        'ctu': get_ctu_config_from_json(os.getenv('ANALYZE_BUILD_CTU'))
329    }
330    # call static analyzer against the compilation
331    for source in compilation.files:
332        parameters.update({'file': source})
333        logging.debug('analyzer parameters %s', parameters)
334        current = run(parameters)
335        # display error message from the static analyzer
336        if current is not None:
337            for line in current['error_output']:
338                logging.info(line.rstrip())
339
340
341@contextlib.contextmanager
342def report_directory(hint, keep, output_format):
343    """ Responsible for the report directory.
344
345    hint -- could specify the parent directory of the output directory.
346    keep -- a boolean value to keep or delete the empty report directory. """
347
348    stamp_format = 'scan-build-%Y-%m-%d-%H-%M-%S-%f-'
349    stamp = datetime.datetime.now().strftime(stamp_format)
350    parent_dir = os.path.abspath(hint)
351    if not os.path.exists(parent_dir):
352        os.makedirs(parent_dir)
353    name = tempfile.mkdtemp(prefix=stamp, dir=parent_dir)
354
355    logging.info('Report directory created: %s', name)
356
357    try:
358        yield name
359    finally:
360        args = (name,)
361        if os.listdir(name):
362            if output_format not in ['sarif', 'sarif-html']: # FIXME:
363                # 'scan-view' currently does not support sarif format.
364                msg = "Run 'scan-view %s' to examine bug reports."
365            elif output_format == 'sarif-html':
366                msg = "Run 'scan-view %s' to examine bug reports or see " \
367                    "merged sarif results at %s/results-merged.sarif."
368                args = (name, name)
369            else:
370                msg = "View merged sarif results at %s/results-merged.sarif."
371            keep = True
372        else:
373            if keep:
374                msg = "Report directory '%s' contains no report, but kept."
375            else:
376                msg = "Removing directory '%s' because it contains no report."
377        logging.warning(msg, *args)
378
379        if not keep:
380            os.rmdir(name)
381
382
383def analyzer_params(args):
384    """ A group of command line arguments can mapped to command
385    line arguments of the analyzer. This method generates those. """
386
387    result = []
388
389    if args.store_model:
390        result.append('-analyzer-store={0}'.format(args.store_model))
391    if args.constraints_model:
392        result.append('-analyzer-constraints={0}'.format(
393            args.constraints_model))
394    if args.internal_stats:
395        result.append('-analyzer-stats')
396    if args.analyze_headers:
397        result.append('-analyzer-opt-analyze-headers')
398    if args.stats:
399        result.append('-analyzer-checker=debug.Stats')
400    if args.maxloop:
401        result.extend(['-analyzer-max-loop', str(args.maxloop)])
402    if args.output_format:
403        result.append('-analyzer-output={0}'.format(args.output_format))
404    if args.analyzer_config:
405        result.extend(['-analyzer-config', args.analyzer_config])
406    if args.verbose >= 4:
407        result.append('-analyzer-display-progress')
408    if args.plugins:
409        result.extend(prefix_with('-load', args.plugins))
410    if args.enable_checker:
411        checkers = ','.join(args.enable_checker)
412        result.extend(['-analyzer-checker', checkers])
413    if args.disable_checker:
414        checkers = ','.join(args.disable_checker)
415        result.extend(['-analyzer-disable-checker', checkers])
416
417    return prefix_with('-Xclang', result)
418
419
420def require(required):
421    """ Decorator for checking the required values in state.
422
423    It checks the required attributes in the passed state and stop when
424    any of those is missing. """
425
426    def decorator(function):
427        @functools.wraps(function)
428        def wrapper(*args, **kwargs):
429            for key in required:
430                if key not in args[0]:
431                    raise KeyError('{0} not passed to {1}'.format(
432                        key, function.__name__))
433
434            return function(*args, **kwargs)
435
436        return wrapper
437
438    return decorator
439
440
441@require(['command',  # entry from compilation database
442          'directory',  # entry from compilation database
443          'file',  # entry from compilation database
444          'clang',  # clang executable name (and path)
445          'direct_args',  # arguments from command line
446          'force_debug',  # kill non debug macros
447          'output_dir',  # where generated report files shall go
448          'output_format',  # it's 'plist', 'html', 'plist-html', 'plist-multi-file', 'sarif', or 'sarif-html'
449          'output_failures',  # generate crash reports or not
450          'ctu'])  # ctu control options
451def run(opts):
452    """ Entry point to run (or not) static analyzer against a single entry
453    of the compilation database.
454
455    This complex task is decomposed into smaller methods which are calling
456    each other in chain. If the analysis is not possible the given method
457    just return and break the chain.
458
459    The passed parameter is a python dictionary. Each method first check
460    that the needed parameters received. (This is done by the 'require'
461    decorator. It's like an 'assert' to check the contract between the
462    caller and the called method.) """
463
464    try:
465        command = opts.pop('command')
466        command = command if isinstance(command, list) else decode(command)
467        logging.debug("Run analyzer against '%s'", command)
468        opts.update(classify_parameters(command))
469
470        return arch_check(opts)
471    except Exception:
472        logging.error("Problem occurred during analysis.", exc_info=1)
473        return None
474
475
476@require(['clang', 'directory', 'flags', 'file', 'output_dir', 'language',
477          'error_output', 'exit_code'])
478def report_failure(opts):
479    """ Create report when analyzer failed.
480
481    The major report is the preprocessor output. The output filename generated
482    randomly. The compiler output also captured into '.stderr.txt' file.
483    And some more execution context also saved into '.info.txt' file. """
484
485    def extension():
486        """ Generate preprocessor file extension. """
487
488        mapping = {'objective-c++': '.mii', 'objective-c': '.mi', 'c++': '.ii'}
489        return mapping.get(opts['language'], '.i')
490
491    def destination():
492        """ Creates failures directory if not exits yet. """
493
494        failures_dir = os.path.join(opts['output_dir'], 'failures')
495        if not os.path.isdir(failures_dir):
496            os.makedirs(failures_dir)
497        return failures_dir
498
499    # Classify error type: when Clang terminated by a signal it's a 'Crash'.
500    # (python subprocess Popen.returncode is negative when child terminated
501    # by signal.) Everything else is 'Other Error'.
502    error = 'crash' if opts['exit_code'] < 0 else 'other_error'
503    # Create preprocessor output file name. (This is blindly following the
504    # Perl implementation.)
505    (handle, name) = tempfile.mkstemp(suffix=extension(),
506                                      prefix='clang_' + error + '_',
507                                      dir=destination())
508    os.close(handle)
509    # Execute Clang again, but run the syntax check only.
510    cwd = opts['directory']
511    cmd = [opts['clang'], '-fsyntax-only', '-E'] + opts['flags'] + \
512        [opts['file'], '-o', name]
513    try:
514        cmd = get_arguments(cmd, cwd)
515        run_command(cmd, cwd=cwd)
516    except subprocess.CalledProcessError:
517        pass
518    except ClangErrorException:
519        pass
520    # write general information about the crash
521    with open(name + '.info.txt', 'w') as handle:
522        handle.write(opts['file'] + os.linesep)
523        handle.write(error.title().replace('_', ' ') + os.linesep)
524        handle.write(' '.join(cmd) + os.linesep)
525        handle.write(' '.join(os.uname()) + os.linesep)
526        handle.write(get_version(opts['clang']))
527        handle.close()
528    # write the captured output too
529    with open(name + '.stderr.txt', 'w') as handle:
530        handle.writelines(opts['error_output'])
531        handle.close()
532
533
534@require(['clang', 'directory', 'flags', 'direct_args', 'file', 'output_dir',
535          'output_format'])
536def run_analyzer(opts, continuation=report_failure):
537    """ It assembles the analysis command line and executes it. Capture the
538    output of the analysis and returns with it. If failure reports are
539    requested, it calls the continuation to generate it. """
540
541    def target():
542        """ Creates output file name for reports. """
543        if opts['output_format'] in {
544                'plist',
545                'plist-html',
546                'plist-multi-file'}:
547            (handle, name) = tempfile.mkstemp(prefix='report-',
548                                              suffix='.plist',
549                                              dir=opts['output_dir'])
550            os.close(handle)
551            return name
552        elif opts['output_format'] in {
553                'sarif',
554                'sarif-html'}:
555            (handle, name) = tempfile.mkstemp(prefix='result-',
556                                              suffix='.sarif',
557                                              dir=opts['output_dir'])
558            os.close(handle)
559            return name
560        return opts['output_dir']
561
562    try:
563        cwd = opts['directory']
564        cmd = get_arguments([opts['clang'], '--analyze'] +
565                            opts['direct_args'] + opts['flags'] +
566                            [opts['file'], '-o', target()],
567                            cwd)
568        output = run_command(cmd, cwd=cwd)
569        return {'error_output': output, 'exit_code': 0}
570    except subprocess.CalledProcessError as ex:
571        result = {'error_output': ex.output, 'exit_code': ex.returncode}
572        if opts.get('output_failures', False):
573            opts.update(result)
574            continuation(opts)
575        return result
576    except ClangErrorException as ex:
577        result = {'error_output': ex.error, 'exit_code': 0}
578        if opts.get('output_failures', False):
579            opts.update(result)
580            continuation(opts)
581        return result
582
583
584def extdef_map_list_src_to_ast(extdef_src_list):
585    """ Turns textual external definition map list with source files into an
586    external definition map list with ast files. """
587
588    extdef_ast_list = []
589    for extdef_src_txt in extdef_src_list:
590        mangled_name, path = extdef_src_txt.split(" ", 1)
591        # Normalize path on windows as well
592        path = os.path.splitdrive(path)[1]
593        # Make relative path out of absolute
594        path = path[1:] if path[0] == os.sep else path
595        ast_path = os.path.join("ast", path + ".ast")
596        extdef_ast_list.append(mangled_name + " " + ast_path)
597    return extdef_ast_list
598
599
600@require(['clang', 'directory', 'flags', 'direct_args', 'file', 'ctu'])
601def ctu_collect_phase(opts):
602    """ Preprocess source by generating all data needed by CTU analysis. """
603
604    def generate_ast(triple_arch):
605        """ Generates ASTs for the current compilation command. """
606
607        args = opts['direct_args'] + opts['flags']
608        ast_joined_path = os.path.join(opts['ctu'].dir, triple_arch, 'ast',
609                                       os.path.realpath(opts['file'])[1:] +
610                                       '.ast')
611        ast_path = os.path.abspath(ast_joined_path)
612        ast_dir = os.path.dirname(ast_path)
613        if not os.path.isdir(ast_dir):
614            try:
615                os.makedirs(ast_dir)
616            except OSError:
617                # In case an other process already created it.
618                pass
619        ast_command = [opts['clang'], '-emit-ast']
620        ast_command.extend(args)
621        ast_command.append('-w')
622        ast_command.append(opts['file'])
623        ast_command.append('-o')
624        ast_command.append(ast_path)
625        logging.debug("Generating AST using '%s'", ast_command)
626        run_command(ast_command, cwd=opts['directory'])
627
628    def map_extdefs(triple_arch):
629        """ Generate external definition map file for the current source. """
630
631        args = opts['direct_args'] + opts['flags']
632        extdefmap_command = [opts['ctu'].extdef_map_cmd]
633        extdefmap_command.append(opts['file'])
634        extdefmap_command.append('--')
635        extdefmap_command.extend(args)
636        logging.debug("Generating external definition map using '%s'",
637                      extdefmap_command)
638        extdef_src_list = run_command(extdefmap_command, cwd=opts['directory'])
639        extdef_ast_list = extdef_map_list_src_to_ast(extdef_src_list)
640        extern_defs_map_folder = os.path.join(opts['ctu'].dir, triple_arch,
641                                             CTU_TEMP_DEFMAP_FOLDER)
642        if not os.path.isdir(extern_defs_map_folder):
643            try:
644                os.makedirs(extern_defs_map_folder)
645            except OSError:
646                # In case an other process already created it.
647                pass
648        if extdef_ast_list:
649            with tempfile.NamedTemporaryFile(mode='w',
650                                             dir=extern_defs_map_folder,
651                                             delete=False) as out_file:
652                out_file.write("\n".join(extdef_ast_list) + "\n")
653
654    cwd = opts['directory']
655    cmd = [opts['clang'], '--analyze'] + opts['direct_args'] + opts['flags'] \
656        + [opts['file']]
657    triple_arch = get_triple_arch(cmd, cwd)
658    generate_ast(triple_arch)
659    map_extdefs(triple_arch)
660
661
662@require(['ctu'])
663def dispatch_ctu(opts, continuation=run_analyzer):
664    """ Execute only one phase of 2 phases of CTU if needed. """
665
666    ctu_config = opts['ctu']
667
668    if ctu_config.collect or ctu_config.analyze:
669        assert ctu_config.collect != ctu_config.analyze
670        if ctu_config.collect:
671            return ctu_collect_phase(opts)
672        if ctu_config.analyze:
673            cwd = opts['directory']
674            cmd = [opts['clang'], '--analyze'] + opts['direct_args'] \
675                + opts['flags'] + [opts['file']]
676            triarch = get_triple_arch(cmd, cwd)
677            ctu_options = ['ctu-dir=' + os.path.join(ctu_config.dir, triarch),
678                           'experimental-enable-naive-ctu-analysis=true']
679            analyzer_options = prefix_with('-analyzer-config', ctu_options)
680            direct_options = prefix_with('-Xanalyzer', analyzer_options)
681            opts['direct_args'].extend(direct_options)
682
683    return continuation(opts)
684
685
686@require(['flags', 'force_debug'])
687def filter_debug_flags(opts, continuation=dispatch_ctu):
688    """ Filter out nondebug macros when requested. """
689
690    if opts.pop('force_debug'):
691        # lazy implementation just append an undefine macro at the end
692        opts.update({'flags': opts['flags'] + ['-UNDEBUG']})
693
694    return continuation(opts)
695
696
697@require(['language', 'compiler', 'file', 'flags'])
698def language_check(opts, continuation=filter_debug_flags):
699    """ Find out the language from command line parameters or file name
700    extension. The decision also influenced by the compiler invocation. """
701
702    accepted = frozenset({
703        'c', 'c++', 'objective-c', 'objective-c++', 'c-cpp-output',
704        'c++-cpp-output', 'objective-c-cpp-output'
705    })
706
707    # language can be given as a parameter...
708    language = opts.pop('language')
709    compiler = opts.pop('compiler')
710    # ... or find out from source file extension
711    if language is None and compiler is not None:
712        language = classify_source(opts['file'], compiler == 'c')
713
714    if language is None:
715        logging.debug('skip analysis, language not known')
716        return None
717    elif language not in accepted:
718        logging.debug('skip analysis, language not supported')
719        return None
720    else:
721        logging.debug('analysis, language: %s', language)
722        opts.update({'language': language,
723                     'flags': ['-x', language] + opts['flags']})
724        return continuation(opts)
725
726
727@require(['arch_list', 'flags'])
728def arch_check(opts, continuation=language_check):
729    """ Do run analyzer through one of the given architectures. """
730
731    disabled = frozenset({'ppc', 'ppc64'})
732
733    received_list = opts.pop('arch_list')
734    if received_list:
735        # filter out disabled architectures and -arch switches
736        filtered_list = [a for a in received_list if a not in disabled]
737        if filtered_list:
738            # There should be only one arch given (or the same multiple
739            # times). If there are multiple arch are given and are not
740            # the same, those should not change the pre-processing step.
741            # But that's the only pass we have before run the analyzer.
742            current = filtered_list.pop()
743            logging.debug('analysis, on arch: %s', current)
744
745            opts.update({'flags': ['-arch', current] + opts['flags']})
746            return continuation(opts)
747        else:
748            logging.debug('skip analysis, found not supported arch')
749            return None
750    else:
751        logging.debug('analysis, on default arch')
752        return continuation(opts)
753
754
755# To have good results from static analyzer certain compiler options shall be
756# omitted. The compiler flag filtering only affects the static analyzer run.
757#
758# Keys are the option name, value number of options to skip
759IGNORED_FLAGS = {
760    '-c': 0,  # compile option will be overwritten
761    '-fsyntax-only': 0,  # static analyzer option will be overwritten
762    '-o': 1,  # will set up own output file
763    # flags below are inherited from the perl implementation.
764    '-g': 0,
765    '-save-temps': 0,
766    '-install_name': 1,
767    '-exported_symbols_list': 1,
768    '-current_version': 1,
769    '-compatibility_version': 1,
770    '-init': 1,
771    '-e': 1,
772    '-seg1addr': 1,
773    '-bundle_loader': 1,
774    '-multiply_defined': 1,
775    '-sectorder': 3,
776    '--param': 1,
777    '--serialize-diagnostics': 1
778}
779
780
781def classify_parameters(command):
782    """ Prepare compiler flags (filters some and add others) and take out
783    language (-x) and architecture (-arch) flags for future processing. """
784
785    result = {
786        'flags': [],  # the filtered compiler flags
787        'arch_list': [],  # list of architecture flags
788        'language': None,  # compilation language, None, if not specified
789        'compiler': compiler_language(command)  # 'c' or 'c++'
790    }
791
792    # iterate on the compile options
793    args = iter(command[1:])
794    for arg in args:
795        # take arch flags into a separate basket
796        if arg == '-arch':
797            result['arch_list'].append(next(args))
798        # take language
799        elif arg == '-x':
800            result['language'] = next(args)
801        # parameters which looks source file are not flags
802        elif re.match(r'^[^-].+', arg) and classify_source(arg):
803            pass
804        # ignore some flags
805        elif arg in IGNORED_FLAGS:
806            count = IGNORED_FLAGS[arg]
807            for _ in range(count):
808                next(args)
809        # we don't care about extra warnings, but we should suppress ones
810        # that we don't want to see.
811        elif re.match(r'^-W.+', arg) and not re.match(r'^-Wno-.+', arg):
812            pass
813        # and consider everything else as compilation flag.
814        else:
815            result['flags'].append(arg)
816
817    return result
818