17330f729Sjoerg# -*- coding: utf-8 -*- 27330f729Sjoerg# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 37330f729Sjoerg# See https://llvm.org/LICENSE.txt for license information. 47330f729Sjoerg# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 57330f729Sjoerg""" This module implements the 'scan-build' command API. 67330f729Sjoerg 77330f729SjoergTo run the static analyzer against a build is done in multiple steps: 87330f729Sjoerg 97330f729Sjoerg -- Intercept: capture the compilation command during the build, 107330f729Sjoerg -- Analyze: run the analyzer against the captured commands, 117330f729Sjoerg -- Report: create a cover report from the analyzer outputs. """ 127330f729Sjoerg 137330f729Sjoergimport re 147330f729Sjoergimport os 157330f729Sjoergimport os.path 167330f729Sjoergimport json 177330f729Sjoergimport logging 187330f729Sjoergimport multiprocessing 197330f729Sjoergimport tempfile 207330f729Sjoergimport functools 217330f729Sjoergimport subprocess 227330f729Sjoergimport contextlib 237330f729Sjoergimport datetime 247330f729Sjoergimport shutil 257330f729Sjoergimport glob 267330f729Sjoergfrom collections import defaultdict 277330f729Sjoerg 287330f729Sjoergfrom libscanbuild import command_entry_point, compiler_wrapper, \ 297330f729Sjoerg wrapper_environment, run_build, run_command, CtuConfig 307330f729Sjoergfrom libscanbuild.arguments import parse_args_for_scan_build, \ 317330f729Sjoerg parse_args_for_analyze_build 327330f729Sjoergfrom libscanbuild.intercept import capture 337330f729Sjoergfrom libscanbuild.report import document 347330f729Sjoergfrom libscanbuild.compilation import split_command, classify_source, \ 357330f729Sjoerg compiler_language 36*e038c9c4Sjoergfrom libscanbuild.clang import get_version, get_arguments, get_triple_arch, \ 37*e038c9c4Sjoerg ClangErrorException 387330f729Sjoergfrom libscanbuild.shell import decode 397330f729Sjoerg 407330f729Sjoerg__all__ = ['scan_build', 'analyze_build', 'analyze_compiler_wrapper'] 417330f729Sjoerg 427330f729SjoergCOMPILER_WRAPPER_CC = 'analyze-cc' 437330f729SjoergCOMPILER_WRAPPER_CXX = 'analyze-c++' 447330f729Sjoerg 457330f729SjoergCTU_EXTDEF_MAP_FILENAME = 'externalDefMap.txt' 467330f729SjoergCTU_TEMP_DEFMAP_FOLDER = 'tmpExternalDefMaps' 477330f729Sjoerg 487330f729Sjoerg 497330f729Sjoerg@command_entry_point 507330f729Sjoergdef scan_build(): 517330f729Sjoerg """ Entry point for scan-build command. """ 527330f729Sjoerg 537330f729Sjoerg args = parse_args_for_scan_build() 547330f729Sjoerg # will re-assign the report directory as new output 55*e038c9c4Sjoerg with report_directory( 56*e038c9c4Sjoerg args.output, args.keep_empty, args.output_format) as args.output: 577330f729Sjoerg # Run against a build command. there are cases, when analyzer run 587330f729Sjoerg # is not required. But we need to set up everything for the 597330f729Sjoerg # wrappers, because 'configure' needs to capture the CC/CXX values 607330f729Sjoerg # for the Makefile. 617330f729Sjoerg if args.intercept_first: 627330f729Sjoerg # Run build command with intercept module. 637330f729Sjoerg exit_code = capture(args) 647330f729Sjoerg # Run the analyzer against the captured commands. 657330f729Sjoerg if need_analyzer(args.build): 667330f729Sjoerg govern_analyzer_runs(args) 677330f729Sjoerg else: 687330f729Sjoerg # Run build command and analyzer with compiler wrappers. 697330f729Sjoerg environment = setup_environment(args) 707330f729Sjoerg exit_code = run_build(args.build, env=environment) 717330f729Sjoerg # Cover report generation and bug counting. 727330f729Sjoerg number_of_bugs = document(args) 737330f729Sjoerg # Set exit status as it was requested. 747330f729Sjoerg return number_of_bugs if args.status_bugs else exit_code 757330f729Sjoerg 767330f729Sjoerg 777330f729Sjoerg@command_entry_point 787330f729Sjoergdef analyze_build(): 797330f729Sjoerg """ Entry point for analyze-build command. """ 807330f729Sjoerg 817330f729Sjoerg args = parse_args_for_analyze_build() 827330f729Sjoerg # will re-assign the report directory as new output 83*e038c9c4Sjoerg with report_directory(args.output, args.keep_empty, args.output_format) as args.output: 847330f729Sjoerg # Run the analyzer against a compilation db. 857330f729Sjoerg govern_analyzer_runs(args) 867330f729Sjoerg # Cover report generation and bug counting. 877330f729Sjoerg number_of_bugs = document(args) 887330f729Sjoerg # Set exit status as it was requested. 897330f729Sjoerg return number_of_bugs if args.status_bugs else 0 907330f729Sjoerg 917330f729Sjoerg 927330f729Sjoergdef need_analyzer(args): 937330f729Sjoerg """ Check the intent of the build command. 947330f729Sjoerg 957330f729Sjoerg When static analyzer run against project configure step, it should be 967330f729Sjoerg silent and no need to run the analyzer or generate report. 977330f729Sjoerg 987330f729Sjoerg To run `scan-build` against the configure step might be necessary, 997330f729Sjoerg when compiler wrappers are used. That's the moment when build setup 1007330f729Sjoerg check the compiler and capture the location for the build process. """ 1017330f729Sjoerg 1027330f729Sjoerg return len(args) and not re.search(r'configure|autogen', args[0]) 1037330f729Sjoerg 1047330f729Sjoerg 1057330f729Sjoergdef prefix_with(constant, pieces): 1067330f729Sjoerg """ From a sequence create another sequence where every second element 1077330f729Sjoerg is from the original sequence and the odd elements are the prefix. 1087330f729Sjoerg 1097330f729Sjoerg eg.: prefix_with(0, [1,2,3]) creates [0, 1, 0, 2, 0, 3] """ 1107330f729Sjoerg 1117330f729Sjoerg return [elem for piece in pieces for elem in [constant, piece]] 1127330f729Sjoerg 1137330f729Sjoerg 1147330f729Sjoergdef get_ctu_config_from_args(args): 1157330f729Sjoerg """ CTU configuration is created from the chosen phases and dir. """ 1167330f729Sjoerg 1177330f729Sjoerg return ( 1187330f729Sjoerg CtuConfig(collect=args.ctu_phases.collect, 1197330f729Sjoerg analyze=args.ctu_phases.analyze, 1207330f729Sjoerg dir=args.ctu_dir, 1217330f729Sjoerg extdef_map_cmd=args.extdef_map_cmd) 1227330f729Sjoerg if hasattr(args, 'ctu_phases') and hasattr(args.ctu_phases, 'dir') 1237330f729Sjoerg else CtuConfig(collect=False, analyze=False, dir='', extdef_map_cmd='')) 1247330f729Sjoerg 1257330f729Sjoerg 1267330f729Sjoergdef get_ctu_config_from_json(ctu_conf_json): 1277330f729Sjoerg """ CTU configuration is created from the chosen phases and dir. """ 1287330f729Sjoerg 1297330f729Sjoerg ctu_config = json.loads(ctu_conf_json) 1307330f729Sjoerg # Recover namedtuple from json when coming from analyze-cc or analyze-c++ 1317330f729Sjoerg return CtuConfig(collect=ctu_config[0], 1327330f729Sjoerg analyze=ctu_config[1], 1337330f729Sjoerg dir=ctu_config[2], 1347330f729Sjoerg extdef_map_cmd=ctu_config[3]) 1357330f729Sjoerg 1367330f729Sjoerg 1377330f729Sjoergdef create_global_ctu_extdef_map(extdef_map_lines): 1387330f729Sjoerg """ Takes iterator of individual external definition maps and creates a 1397330f729Sjoerg global map keeping only unique names. We leave conflicting names out of 1407330f729Sjoerg CTU. 1417330f729Sjoerg 1427330f729Sjoerg :param extdef_map_lines: Contains the id of a definition (mangled name) and 1437330f729Sjoerg the originating source (the corresponding AST file) name. 1447330f729Sjoerg :type extdef_map_lines: Iterator of str. 1457330f729Sjoerg :returns: Mangled name - AST file pairs. 1467330f729Sjoerg :rtype: List of (str, str) tuples. 1477330f729Sjoerg """ 1487330f729Sjoerg 1497330f729Sjoerg mangled_to_asts = defaultdict(set) 1507330f729Sjoerg 1517330f729Sjoerg for line in extdef_map_lines: 1527330f729Sjoerg mangled_name, ast_file = line.strip().split(' ', 1) 1537330f729Sjoerg mangled_to_asts[mangled_name].add(ast_file) 1547330f729Sjoerg 1557330f729Sjoerg mangled_ast_pairs = [] 1567330f729Sjoerg 1577330f729Sjoerg for mangled_name, ast_files in mangled_to_asts.items(): 1587330f729Sjoerg if len(ast_files) == 1: 1597330f729Sjoerg mangled_ast_pairs.append((mangled_name, next(iter(ast_files)))) 1607330f729Sjoerg 1617330f729Sjoerg return mangled_ast_pairs 1627330f729Sjoerg 1637330f729Sjoerg 1647330f729Sjoergdef merge_ctu_extdef_maps(ctudir): 1657330f729Sjoerg """ Merge individual external definition maps into a global one. 1667330f729Sjoerg 1677330f729Sjoerg As the collect phase runs parallel on multiple threads, all compilation 1687330f729Sjoerg units are separately mapped into a temporary file in CTU_TEMP_DEFMAP_FOLDER. 1697330f729Sjoerg These definition maps contain the mangled names and the source 1707330f729Sjoerg (AST generated from the source) which had their definition. 1717330f729Sjoerg These files should be merged at the end into a global map file: 1727330f729Sjoerg CTU_EXTDEF_MAP_FILENAME.""" 1737330f729Sjoerg 1747330f729Sjoerg def generate_extdef_map_lines(extdefmap_dir): 1757330f729Sjoerg """ Iterate over all lines of input files in a determined order. """ 1767330f729Sjoerg 1777330f729Sjoerg files = glob.glob(os.path.join(extdefmap_dir, '*')) 1787330f729Sjoerg files.sort() 1797330f729Sjoerg for filename in files: 1807330f729Sjoerg with open(filename, 'r') as in_file: 1817330f729Sjoerg for line in in_file: 1827330f729Sjoerg yield line 1837330f729Sjoerg 1847330f729Sjoerg def write_global_map(arch, mangled_ast_pairs): 1857330f729Sjoerg """ Write (mangled name, ast file) pairs into final file. """ 1867330f729Sjoerg 1877330f729Sjoerg extern_defs_map_file = os.path.join(ctudir, arch, 1887330f729Sjoerg CTU_EXTDEF_MAP_FILENAME) 1897330f729Sjoerg with open(extern_defs_map_file, 'w') as out_file: 1907330f729Sjoerg for mangled_name, ast_file in mangled_ast_pairs: 1917330f729Sjoerg out_file.write('%s %s\n' % (mangled_name, ast_file)) 1927330f729Sjoerg 1937330f729Sjoerg triple_arches = glob.glob(os.path.join(ctudir, '*')) 1947330f729Sjoerg for triple_path in triple_arches: 1957330f729Sjoerg if os.path.isdir(triple_path): 1967330f729Sjoerg triple_arch = os.path.basename(triple_path) 1977330f729Sjoerg extdefmap_dir = os.path.join(ctudir, triple_arch, 1987330f729Sjoerg CTU_TEMP_DEFMAP_FOLDER) 1997330f729Sjoerg 2007330f729Sjoerg extdef_map_lines = generate_extdef_map_lines(extdefmap_dir) 2017330f729Sjoerg mangled_ast_pairs = create_global_ctu_extdef_map(extdef_map_lines) 2027330f729Sjoerg write_global_map(triple_arch, mangled_ast_pairs) 2037330f729Sjoerg 2047330f729Sjoerg # Remove all temporary files 2057330f729Sjoerg shutil.rmtree(extdefmap_dir, ignore_errors=True) 2067330f729Sjoerg 2077330f729Sjoerg 2087330f729Sjoergdef run_analyzer_parallel(args): 2097330f729Sjoerg """ Runs the analyzer against the given compilation database. """ 2107330f729Sjoerg 211*e038c9c4Sjoerg def exclude(filename, directory): 2127330f729Sjoerg """ Return true when any excluded directory prefix the filename. """ 213*e038c9c4Sjoerg if not os.path.isabs(filename): 214*e038c9c4Sjoerg # filename is either absolute or relative to directory. Need to turn 215*e038c9c4Sjoerg # it to absolute since 'args.excludes' are absolute paths. 216*e038c9c4Sjoerg filename = os.path.normpath(os.path.join(directory, filename)) 217*e038c9c4Sjoerg return any(re.match(r'^' + exclude_directory, filename) 218*e038c9c4Sjoerg for exclude_directory in args.excludes) 2197330f729Sjoerg 2207330f729Sjoerg consts = { 2217330f729Sjoerg 'clang': args.clang, 2227330f729Sjoerg 'output_dir': args.output, 2237330f729Sjoerg 'output_format': args.output_format, 2247330f729Sjoerg 'output_failures': args.output_failures, 2257330f729Sjoerg 'direct_args': analyzer_params(args), 2267330f729Sjoerg 'force_debug': args.force_debug, 2277330f729Sjoerg 'ctu': get_ctu_config_from_args(args) 2287330f729Sjoerg } 2297330f729Sjoerg 2307330f729Sjoerg logging.debug('run analyzer against compilation database') 2317330f729Sjoerg with open(args.cdb, 'r') as handle: 2327330f729Sjoerg generator = (dict(cmd, **consts) 233*e038c9c4Sjoerg for cmd in json.load(handle) if not exclude( 234*e038c9c4Sjoerg cmd['file'], cmd['directory'])) 2357330f729Sjoerg # when verbose output requested execute sequentially 2367330f729Sjoerg pool = multiprocessing.Pool(1 if args.verbose > 2 else None) 2377330f729Sjoerg for current in pool.imap_unordered(run, generator): 2387330f729Sjoerg if current is not None: 2397330f729Sjoerg # display error message from the static analyzer 2407330f729Sjoerg for line in current['error_output']: 2417330f729Sjoerg logging.info(line.rstrip()) 2427330f729Sjoerg pool.close() 2437330f729Sjoerg pool.join() 2447330f729Sjoerg 2457330f729Sjoerg 2467330f729Sjoergdef govern_analyzer_runs(args): 2477330f729Sjoerg """ Governs multiple runs in CTU mode or runs once in normal mode. """ 2487330f729Sjoerg 2497330f729Sjoerg ctu_config = get_ctu_config_from_args(args) 2507330f729Sjoerg # If we do a CTU collect (1st phase) we remove all previous collection 2517330f729Sjoerg # data first. 2527330f729Sjoerg if ctu_config.collect: 2537330f729Sjoerg shutil.rmtree(ctu_config.dir, ignore_errors=True) 2547330f729Sjoerg 2557330f729Sjoerg # If the user asked for a collect (1st) and analyze (2nd) phase, we do an 2567330f729Sjoerg # all-in-one run where we deliberately remove collection data before and 2577330f729Sjoerg # also after the run. If the user asks only for a single phase data is 2587330f729Sjoerg # left so multiple analyze runs can use the same data gathered by a single 2597330f729Sjoerg # collection run. 2607330f729Sjoerg if ctu_config.collect and ctu_config.analyze: 2617330f729Sjoerg # CTU strings are coming from args.ctu_dir and extdef_map_cmd, 2627330f729Sjoerg # so we can leave it empty 2637330f729Sjoerg args.ctu_phases = CtuConfig(collect=True, analyze=False, 2647330f729Sjoerg dir='', extdef_map_cmd='') 2657330f729Sjoerg run_analyzer_parallel(args) 2667330f729Sjoerg merge_ctu_extdef_maps(ctu_config.dir) 2677330f729Sjoerg args.ctu_phases = CtuConfig(collect=False, analyze=True, 2687330f729Sjoerg dir='', extdef_map_cmd='') 2697330f729Sjoerg run_analyzer_parallel(args) 2707330f729Sjoerg shutil.rmtree(ctu_config.dir, ignore_errors=True) 2717330f729Sjoerg else: 2727330f729Sjoerg # Single runs (collect or analyze) are launched from here. 2737330f729Sjoerg run_analyzer_parallel(args) 2747330f729Sjoerg if ctu_config.collect: 2757330f729Sjoerg merge_ctu_extdef_maps(ctu_config.dir) 2767330f729Sjoerg 2777330f729Sjoerg 2787330f729Sjoergdef setup_environment(args): 2797330f729Sjoerg """ Set up environment for build command to interpose compiler wrapper. """ 2807330f729Sjoerg 2817330f729Sjoerg environment = dict(os.environ) 2827330f729Sjoerg environment.update(wrapper_environment(args)) 2837330f729Sjoerg environment.update({ 2847330f729Sjoerg 'CC': COMPILER_WRAPPER_CC, 2857330f729Sjoerg 'CXX': COMPILER_WRAPPER_CXX, 2867330f729Sjoerg 'ANALYZE_BUILD_CLANG': args.clang if need_analyzer(args.build) else '', 2877330f729Sjoerg 'ANALYZE_BUILD_REPORT_DIR': args.output, 2887330f729Sjoerg 'ANALYZE_BUILD_REPORT_FORMAT': args.output_format, 2897330f729Sjoerg 'ANALYZE_BUILD_REPORT_FAILURES': 'yes' if args.output_failures else '', 2907330f729Sjoerg 'ANALYZE_BUILD_PARAMETERS': ' '.join(analyzer_params(args)), 2917330f729Sjoerg 'ANALYZE_BUILD_FORCE_DEBUG': 'yes' if args.force_debug else '', 2927330f729Sjoerg 'ANALYZE_BUILD_CTU': json.dumps(get_ctu_config_from_args(args)) 2937330f729Sjoerg }) 2947330f729Sjoerg return environment 2957330f729Sjoerg 2967330f729Sjoerg 2977330f729Sjoerg@command_entry_point 2987330f729Sjoergdef analyze_compiler_wrapper(): 2997330f729Sjoerg """ Entry point for `analyze-cc` and `analyze-c++` compiler wrappers. """ 3007330f729Sjoerg 3017330f729Sjoerg return compiler_wrapper(analyze_compiler_wrapper_impl) 3027330f729Sjoerg 3037330f729Sjoerg 3047330f729Sjoergdef analyze_compiler_wrapper_impl(result, execution): 3057330f729Sjoerg """ Implements analyzer compiler wrapper functionality. """ 3067330f729Sjoerg 3077330f729Sjoerg # don't run analyzer when compilation fails. or when it's not requested. 3087330f729Sjoerg if result or not os.getenv('ANALYZE_BUILD_CLANG'): 3097330f729Sjoerg return 3107330f729Sjoerg 3117330f729Sjoerg # check is it a compilation? 3127330f729Sjoerg compilation = split_command(execution.cmd) 3137330f729Sjoerg if compilation is None: 3147330f729Sjoerg return 3157330f729Sjoerg # collect the needed parameters from environment, crash when missing 3167330f729Sjoerg parameters = { 3177330f729Sjoerg 'clang': os.getenv('ANALYZE_BUILD_CLANG'), 3187330f729Sjoerg 'output_dir': os.getenv('ANALYZE_BUILD_REPORT_DIR'), 3197330f729Sjoerg 'output_format': os.getenv('ANALYZE_BUILD_REPORT_FORMAT'), 3207330f729Sjoerg 'output_failures': os.getenv('ANALYZE_BUILD_REPORT_FAILURES'), 3217330f729Sjoerg 'direct_args': os.getenv('ANALYZE_BUILD_PARAMETERS', 3227330f729Sjoerg '').split(' '), 3237330f729Sjoerg 'force_debug': os.getenv('ANALYZE_BUILD_FORCE_DEBUG'), 3247330f729Sjoerg 'directory': execution.cwd, 3257330f729Sjoerg 'command': [execution.cmd[0], '-c'] + compilation.flags, 3267330f729Sjoerg 'ctu': get_ctu_config_from_json(os.getenv('ANALYZE_BUILD_CTU')) 3277330f729Sjoerg } 3287330f729Sjoerg # call static analyzer against the compilation 3297330f729Sjoerg for source in compilation.files: 3307330f729Sjoerg parameters.update({'file': source}) 3317330f729Sjoerg logging.debug('analyzer parameters %s', parameters) 3327330f729Sjoerg current = run(parameters) 3337330f729Sjoerg # display error message from the static analyzer 3347330f729Sjoerg if current is not None: 3357330f729Sjoerg for line in current['error_output']: 3367330f729Sjoerg logging.info(line.rstrip()) 3377330f729Sjoerg 3387330f729Sjoerg 3397330f729Sjoerg@contextlib.contextmanager 340*e038c9c4Sjoergdef report_directory(hint, keep, output_format): 3417330f729Sjoerg """ Responsible for the report directory. 3427330f729Sjoerg 3437330f729Sjoerg hint -- could specify the parent directory of the output directory. 3447330f729Sjoerg keep -- a boolean value to keep or delete the empty report directory. """ 3457330f729Sjoerg 3467330f729Sjoerg stamp_format = 'scan-build-%Y-%m-%d-%H-%M-%S-%f-' 3477330f729Sjoerg stamp = datetime.datetime.now().strftime(stamp_format) 3487330f729Sjoerg parent_dir = os.path.abspath(hint) 3497330f729Sjoerg if not os.path.exists(parent_dir): 3507330f729Sjoerg os.makedirs(parent_dir) 3517330f729Sjoerg name = tempfile.mkdtemp(prefix=stamp, dir=parent_dir) 3527330f729Sjoerg 3537330f729Sjoerg logging.info('Report directory created: %s', name) 3547330f729Sjoerg 3557330f729Sjoerg try: 3567330f729Sjoerg yield name 3577330f729Sjoerg finally: 3587330f729Sjoerg if os.listdir(name): 359*e038c9c4Sjoerg if output_format not in ['sarif', 'sarif-html']: # FIXME: 360*e038c9c4Sjoerg # 'scan-view' currently does not support sarif format. 3617330f729Sjoerg msg = "Run 'scan-view %s' to examine bug reports." 362*e038c9c4Sjoerg elif output_format == 'sarif-html': 363*e038c9c4Sjoerg msg = "Run 'scan-view %s' to examine bug reports or see " \ 364*e038c9c4Sjoerg "merged sarif results at %s/results-merged.sarif." 365*e038c9c4Sjoerg else: 366*e038c9c4Sjoerg msg = "View merged sarif results at %s/results-merged.sarif." 3677330f729Sjoerg keep = True 3687330f729Sjoerg else: 3697330f729Sjoerg if keep: 3707330f729Sjoerg msg = "Report directory '%s' contains no report, but kept." 3717330f729Sjoerg else: 3727330f729Sjoerg msg = "Removing directory '%s' because it contains no report." 3737330f729Sjoerg logging.warning(msg, name) 3747330f729Sjoerg 3757330f729Sjoerg if not keep: 3767330f729Sjoerg os.rmdir(name) 3777330f729Sjoerg 3787330f729Sjoerg 3797330f729Sjoergdef analyzer_params(args): 3807330f729Sjoerg """ A group of command line arguments can mapped to command 3817330f729Sjoerg line arguments of the analyzer. This method generates those. """ 3827330f729Sjoerg 3837330f729Sjoerg result = [] 3847330f729Sjoerg 3857330f729Sjoerg if args.store_model: 3867330f729Sjoerg result.append('-analyzer-store={0}'.format(args.store_model)) 3877330f729Sjoerg if args.constraints_model: 3887330f729Sjoerg result.append('-analyzer-constraints={0}'.format( 3897330f729Sjoerg args.constraints_model)) 3907330f729Sjoerg if args.internal_stats: 3917330f729Sjoerg result.append('-analyzer-stats') 3927330f729Sjoerg if args.analyze_headers: 3937330f729Sjoerg result.append('-analyzer-opt-analyze-headers') 3947330f729Sjoerg if args.stats: 3957330f729Sjoerg result.append('-analyzer-checker=debug.Stats') 3967330f729Sjoerg if args.maxloop: 3977330f729Sjoerg result.extend(['-analyzer-max-loop', str(args.maxloop)]) 3987330f729Sjoerg if args.output_format: 3997330f729Sjoerg result.append('-analyzer-output={0}'.format(args.output_format)) 4007330f729Sjoerg if args.analyzer_config: 4017330f729Sjoerg result.extend(['-analyzer-config', args.analyzer_config]) 4027330f729Sjoerg if args.verbose >= 4: 4037330f729Sjoerg result.append('-analyzer-display-progress') 4047330f729Sjoerg if args.plugins: 4057330f729Sjoerg result.extend(prefix_with('-load', args.plugins)) 4067330f729Sjoerg if args.enable_checker: 4077330f729Sjoerg checkers = ','.join(args.enable_checker) 4087330f729Sjoerg result.extend(['-analyzer-checker', checkers]) 4097330f729Sjoerg if args.disable_checker: 4107330f729Sjoerg checkers = ','.join(args.disable_checker) 4117330f729Sjoerg result.extend(['-analyzer-disable-checker', checkers]) 4127330f729Sjoerg 4137330f729Sjoerg return prefix_with('-Xclang', result) 4147330f729Sjoerg 4157330f729Sjoerg 4167330f729Sjoergdef require(required): 4177330f729Sjoerg """ Decorator for checking the required values in state. 4187330f729Sjoerg 4197330f729Sjoerg It checks the required attributes in the passed state and stop when 4207330f729Sjoerg any of those is missing. """ 4217330f729Sjoerg 4227330f729Sjoerg def decorator(function): 4237330f729Sjoerg @functools.wraps(function) 4247330f729Sjoerg def wrapper(*args, **kwargs): 4257330f729Sjoerg for key in required: 4267330f729Sjoerg if key not in args[0]: 4277330f729Sjoerg raise KeyError('{0} not passed to {1}'.format( 4287330f729Sjoerg key, function.__name__)) 4297330f729Sjoerg 4307330f729Sjoerg return function(*args, **kwargs) 4317330f729Sjoerg 4327330f729Sjoerg return wrapper 4337330f729Sjoerg 4347330f729Sjoerg return decorator 4357330f729Sjoerg 4367330f729Sjoerg 4377330f729Sjoerg@require(['command', # entry from compilation database 4387330f729Sjoerg 'directory', # entry from compilation database 4397330f729Sjoerg 'file', # entry from compilation database 4407330f729Sjoerg 'clang', # clang executable name (and path) 4417330f729Sjoerg 'direct_args', # arguments from command line 4427330f729Sjoerg 'force_debug', # kill non debug macros 4437330f729Sjoerg 'output_dir', # where generated report files shall go 444*e038c9c4Sjoerg 'output_format', # it's 'plist', 'html', 'plist-html', 'plist-multi-file', 'sarif', or 'sarif-html' 4457330f729Sjoerg 'output_failures', # generate crash reports or not 4467330f729Sjoerg 'ctu']) # ctu control options 4477330f729Sjoergdef run(opts): 4487330f729Sjoerg """ Entry point to run (or not) static analyzer against a single entry 4497330f729Sjoerg of the compilation database. 4507330f729Sjoerg 4517330f729Sjoerg This complex task is decomposed into smaller methods which are calling 452*e038c9c4Sjoerg each other in chain. If the analysis is not possible the given method 4537330f729Sjoerg just return and break the chain. 4547330f729Sjoerg 4557330f729Sjoerg The passed parameter is a python dictionary. Each method first check 4567330f729Sjoerg that the needed parameters received. (This is done by the 'require' 4577330f729Sjoerg decorator. It's like an 'assert' to check the contract between the 4587330f729Sjoerg caller and the called method.) """ 4597330f729Sjoerg 4607330f729Sjoerg try: 4617330f729Sjoerg command = opts.pop('command') 4627330f729Sjoerg command = command if isinstance(command, list) else decode(command) 4637330f729Sjoerg logging.debug("Run analyzer against '%s'", command) 4647330f729Sjoerg opts.update(classify_parameters(command)) 4657330f729Sjoerg 4667330f729Sjoerg return arch_check(opts) 4677330f729Sjoerg except Exception: 468*e038c9c4Sjoerg logging.error("Problem occurred during analysis.", exc_info=1) 4697330f729Sjoerg return None 4707330f729Sjoerg 4717330f729Sjoerg 4727330f729Sjoerg@require(['clang', 'directory', 'flags', 'file', 'output_dir', 'language', 4737330f729Sjoerg 'error_output', 'exit_code']) 4747330f729Sjoergdef report_failure(opts): 4757330f729Sjoerg """ Create report when analyzer failed. 4767330f729Sjoerg 4777330f729Sjoerg The major report is the preprocessor output. The output filename generated 4787330f729Sjoerg randomly. The compiler output also captured into '.stderr.txt' file. 4797330f729Sjoerg And some more execution context also saved into '.info.txt' file. """ 4807330f729Sjoerg 4817330f729Sjoerg def extension(): 4827330f729Sjoerg """ Generate preprocessor file extension. """ 4837330f729Sjoerg 4847330f729Sjoerg mapping = {'objective-c++': '.mii', 'objective-c': '.mi', 'c++': '.ii'} 4857330f729Sjoerg return mapping.get(opts['language'], '.i') 4867330f729Sjoerg 4877330f729Sjoerg def destination(): 4887330f729Sjoerg """ Creates failures directory if not exits yet. """ 4897330f729Sjoerg 4907330f729Sjoerg failures_dir = os.path.join(opts['output_dir'], 'failures') 4917330f729Sjoerg if not os.path.isdir(failures_dir): 4927330f729Sjoerg os.makedirs(failures_dir) 4937330f729Sjoerg return failures_dir 4947330f729Sjoerg 4957330f729Sjoerg # Classify error type: when Clang terminated by a signal it's a 'Crash'. 4967330f729Sjoerg # (python subprocess Popen.returncode is negative when child terminated 4977330f729Sjoerg # by signal.) Everything else is 'Other Error'. 4987330f729Sjoerg error = 'crash' if opts['exit_code'] < 0 else 'other_error' 4997330f729Sjoerg # Create preprocessor output file name. (This is blindly following the 5007330f729Sjoerg # Perl implementation.) 5017330f729Sjoerg (handle, name) = tempfile.mkstemp(suffix=extension(), 5027330f729Sjoerg prefix='clang_' + error + '_', 5037330f729Sjoerg dir=destination()) 5047330f729Sjoerg os.close(handle) 5057330f729Sjoerg # Execute Clang again, but run the syntax check only. 5067330f729Sjoerg cwd = opts['directory'] 507*e038c9c4Sjoerg cmd = [opts['clang'], '-fsyntax-only', '-E'] + opts['flags'] + \ 508*e038c9c4Sjoerg [opts['file'], '-o', name] 509*e038c9c4Sjoerg try: 510*e038c9c4Sjoerg cmd = get_arguments(cmd, cwd) 5117330f729Sjoerg run_command(cmd, cwd=cwd) 512*e038c9c4Sjoerg except subprocess.CalledProcessError: 513*e038c9c4Sjoerg pass 514*e038c9c4Sjoerg except ClangErrorException: 515*e038c9c4Sjoerg pass 5167330f729Sjoerg # write general information about the crash 5177330f729Sjoerg with open(name + '.info.txt', 'w') as handle: 5187330f729Sjoerg handle.write(opts['file'] + os.linesep) 5197330f729Sjoerg handle.write(error.title().replace('_', ' ') + os.linesep) 5207330f729Sjoerg handle.write(' '.join(cmd) + os.linesep) 5217330f729Sjoerg handle.write(' '.join(os.uname()) + os.linesep) 5227330f729Sjoerg handle.write(get_version(opts['clang'])) 5237330f729Sjoerg handle.close() 5247330f729Sjoerg # write the captured output too 5257330f729Sjoerg with open(name + '.stderr.txt', 'w') as handle: 5267330f729Sjoerg handle.writelines(opts['error_output']) 5277330f729Sjoerg handle.close() 5287330f729Sjoerg 5297330f729Sjoerg 5307330f729Sjoerg@require(['clang', 'directory', 'flags', 'direct_args', 'file', 'output_dir', 5317330f729Sjoerg 'output_format']) 5327330f729Sjoergdef run_analyzer(opts, continuation=report_failure): 5337330f729Sjoerg """ It assembles the analysis command line and executes it. Capture the 5347330f729Sjoerg output of the analysis and returns with it. If failure reports are 5357330f729Sjoerg requested, it calls the continuation to generate it. """ 5367330f729Sjoerg 5377330f729Sjoerg def target(): 5387330f729Sjoerg """ Creates output file name for reports. """ 5397330f729Sjoerg if opts['output_format'] in { 5407330f729Sjoerg 'plist', 5417330f729Sjoerg 'plist-html', 5427330f729Sjoerg 'plist-multi-file'}: 5437330f729Sjoerg (handle, name) = tempfile.mkstemp(prefix='report-', 5447330f729Sjoerg suffix='.plist', 5457330f729Sjoerg dir=opts['output_dir']) 5467330f729Sjoerg os.close(handle) 5477330f729Sjoerg return name 548*e038c9c4Sjoerg elif opts['output_format'] in { 549*e038c9c4Sjoerg 'sarif', 550*e038c9c4Sjoerg 'sarif-html'}: 551*e038c9c4Sjoerg (handle, name) = tempfile.mkstemp(prefix='result-', 552*e038c9c4Sjoerg suffix='.sarif', 553*e038c9c4Sjoerg dir=opts['output_dir']) 554*e038c9c4Sjoerg os.close(handle) 555*e038c9c4Sjoerg return name 5567330f729Sjoerg return opts['output_dir'] 5577330f729Sjoerg 5587330f729Sjoerg try: 5597330f729Sjoerg cwd = opts['directory'] 5607330f729Sjoerg cmd = get_arguments([opts['clang'], '--analyze'] + 5617330f729Sjoerg opts['direct_args'] + opts['flags'] + 5627330f729Sjoerg [opts['file'], '-o', target()], 5637330f729Sjoerg cwd) 5647330f729Sjoerg output = run_command(cmd, cwd=cwd) 5657330f729Sjoerg return {'error_output': output, 'exit_code': 0} 5667330f729Sjoerg except subprocess.CalledProcessError as ex: 5677330f729Sjoerg result = {'error_output': ex.output, 'exit_code': ex.returncode} 5687330f729Sjoerg if opts.get('output_failures', False): 5697330f729Sjoerg opts.update(result) 5707330f729Sjoerg continuation(opts) 5717330f729Sjoerg return result 572*e038c9c4Sjoerg except ClangErrorException as ex: 573*e038c9c4Sjoerg result = {'error_output': ex.error, 'exit_code': 0} 574*e038c9c4Sjoerg if opts.get('output_failures', False): 575*e038c9c4Sjoerg opts.update(result) 576*e038c9c4Sjoerg continuation(opts) 577*e038c9c4Sjoerg return result 5787330f729Sjoerg 5797330f729Sjoerg 5807330f729Sjoergdef extdef_map_list_src_to_ast(extdef_src_list): 5817330f729Sjoerg """ Turns textual external definition map list with source files into an 5827330f729Sjoerg external definition map list with ast files. """ 5837330f729Sjoerg 5847330f729Sjoerg extdef_ast_list = [] 5857330f729Sjoerg for extdef_src_txt in extdef_src_list: 5867330f729Sjoerg mangled_name, path = extdef_src_txt.split(" ", 1) 5877330f729Sjoerg # Normalize path on windows as well 5887330f729Sjoerg path = os.path.splitdrive(path)[1] 5897330f729Sjoerg # Make relative path out of absolute 5907330f729Sjoerg path = path[1:] if path[0] == os.sep else path 5917330f729Sjoerg ast_path = os.path.join("ast", path + ".ast") 5927330f729Sjoerg extdef_ast_list.append(mangled_name + " " + ast_path) 5937330f729Sjoerg return extdef_ast_list 5947330f729Sjoerg 5957330f729Sjoerg 5967330f729Sjoerg@require(['clang', 'directory', 'flags', 'direct_args', 'file', 'ctu']) 5977330f729Sjoergdef ctu_collect_phase(opts): 5987330f729Sjoerg """ Preprocess source by generating all data needed by CTU analysis. """ 5997330f729Sjoerg 6007330f729Sjoerg def generate_ast(triple_arch): 6017330f729Sjoerg """ Generates ASTs for the current compilation command. """ 6027330f729Sjoerg 6037330f729Sjoerg args = opts['direct_args'] + opts['flags'] 6047330f729Sjoerg ast_joined_path = os.path.join(opts['ctu'].dir, triple_arch, 'ast', 6057330f729Sjoerg os.path.realpath(opts['file'])[1:] + 6067330f729Sjoerg '.ast') 6077330f729Sjoerg ast_path = os.path.abspath(ast_joined_path) 6087330f729Sjoerg ast_dir = os.path.dirname(ast_path) 6097330f729Sjoerg if not os.path.isdir(ast_dir): 6107330f729Sjoerg try: 6117330f729Sjoerg os.makedirs(ast_dir) 6127330f729Sjoerg except OSError: 6137330f729Sjoerg # In case an other process already created it. 6147330f729Sjoerg pass 6157330f729Sjoerg ast_command = [opts['clang'], '-emit-ast'] 6167330f729Sjoerg ast_command.extend(args) 6177330f729Sjoerg ast_command.append('-w') 6187330f729Sjoerg ast_command.append(opts['file']) 6197330f729Sjoerg ast_command.append('-o') 6207330f729Sjoerg ast_command.append(ast_path) 6217330f729Sjoerg logging.debug("Generating AST using '%s'", ast_command) 6227330f729Sjoerg run_command(ast_command, cwd=opts['directory']) 6237330f729Sjoerg 6247330f729Sjoerg def map_extdefs(triple_arch): 6257330f729Sjoerg """ Generate external definition map file for the current source. """ 6267330f729Sjoerg 6277330f729Sjoerg args = opts['direct_args'] + opts['flags'] 6287330f729Sjoerg extdefmap_command = [opts['ctu'].extdef_map_cmd] 6297330f729Sjoerg extdefmap_command.append(opts['file']) 6307330f729Sjoerg extdefmap_command.append('--') 6317330f729Sjoerg extdefmap_command.extend(args) 6327330f729Sjoerg logging.debug("Generating external definition map using '%s'", 6337330f729Sjoerg extdefmap_command) 6347330f729Sjoerg extdef_src_list = run_command(extdefmap_command, cwd=opts['directory']) 6357330f729Sjoerg extdef_ast_list = extdef_map_list_src_to_ast(extdef_src_list) 6367330f729Sjoerg extern_defs_map_folder = os.path.join(opts['ctu'].dir, triple_arch, 6377330f729Sjoerg CTU_TEMP_DEFMAP_FOLDER) 6387330f729Sjoerg if not os.path.isdir(extern_defs_map_folder): 6397330f729Sjoerg try: 6407330f729Sjoerg os.makedirs(extern_defs_map_folder) 6417330f729Sjoerg except OSError: 6427330f729Sjoerg # In case an other process already created it. 6437330f729Sjoerg pass 6447330f729Sjoerg if extdef_ast_list: 6457330f729Sjoerg with tempfile.NamedTemporaryFile(mode='w', 6467330f729Sjoerg dir=extern_defs_map_folder, 6477330f729Sjoerg delete=False) as out_file: 6487330f729Sjoerg out_file.write("\n".join(extdef_ast_list) + "\n") 6497330f729Sjoerg 6507330f729Sjoerg cwd = opts['directory'] 6517330f729Sjoerg cmd = [opts['clang'], '--analyze'] + opts['direct_args'] + opts['flags'] \ 6527330f729Sjoerg + [opts['file']] 6537330f729Sjoerg triple_arch = get_triple_arch(cmd, cwd) 6547330f729Sjoerg generate_ast(triple_arch) 6557330f729Sjoerg map_extdefs(triple_arch) 6567330f729Sjoerg 6577330f729Sjoerg 6587330f729Sjoerg@require(['ctu']) 6597330f729Sjoergdef dispatch_ctu(opts, continuation=run_analyzer): 6607330f729Sjoerg """ Execute only one phase of 2 phases of CTU if needed. """ 6617330f729Sjoerg 6627330f729Sjoerg ctu_config = opts['ctu'] 6637330f729Sjoerg 6647330f729Sjoerg if ctu_config.collect or ctu_config.analyze: 6657330f729Sjoerg assert ctu_config.collect != ctu_config.analyze 6667330f729Sjoerg if ctu_config.collect: 6677330f729Sjoerg return ctu_collect_phase(opts) 6687330f729Sjoerg if ctu_config.analyze: 6697330f729Sjoerg cwd = opts['directory'] 6707330f729Sjoerg cmd = [opts['clang'], '--analyze'] + opts['direct_args'] \ 6717330f729Sjoerg + opts['flags'] + [opts['file']] 6727330f729Sjoerg triarch = get_triple_arch(cmd, cwd) 6737330f729Sjoerg ctu_options = ['ctu-dir=' + os.path.join(ctu_config.dir, triarch), 6747330f729Sjoerg 'experimental-enable-naive-ctu-analysis=true'] 6757330f729Sjoerg analyzer_options = prefix_with('-analyzer-config', ctu_options) 6767330f729Sjoerg direct_options = prefix_with('-Xanalyzer', analyzer_options) 6777330f729Sjoerg opts['direct_args'].extend(direct_options) 6787330f729Sjoerg 6797330f729Sjoerg return continuation(opts) 6807330f729Sjoerg 6817330f729Sjoerg 6827330f729Sjoerg@require(['flags', 'force_debug']) 6837330f729Sjoergdef filter_debug_flags(opts, continuation=dispatch_ctu): 6847330f729Sjoerg """ Filter out nondebug macros when requested. """ 6857330f729Sjoerg 6867330f729Sjoerg if opts.pop('force_debug'): 6877330f729Sjoerg # lazy implementation just append an undefine macro at the end 6887330f729Sjoerg opts.update({'flags': opts['flags'] + ['-UNDEBUG']}) 6897330f729Sjoerg 6907330f729Sjoerg return continuation(opts) 6917330f729Sjoerg 6927330f729Sjoerg 6937330f729Sjoerg@require(['language', 'compiler', 'file', 'flags']) 6947330f729Sjoergdef language_check(opts, continuation=filter_debug_flags): 6957330f729Sjoerg """ Find out the language from command line parameters or file name 6967330f729Sjoerg extension. The decision also influenced by the compiler invocation. """ 6977330f729Sjoerg 6987330f729Sjoerg accepted = frozenset({ 6997330f729Sjoerg 'c', 'c++', 'objective-c', 'objective-c++', 'c-cpp-output', 7007330f729Sjoerg 'c++-cpp-output', 'objective-c-cpp-output' 7017330f729Sjoerg }) 7027330f729Sjoerg 7037330f729Sjoerg # language can be given as a parameter... 7047330f729Sjoerg language = opts.pop('language') 7057330f729Sjoerg compiler = opts.pop('compiler') 7067330f729Sjoerg # ... or find out from source file extension 7077330f729Sjoerg if language is None and compiler is not None: 7087330f729Sjoerg language = classify_source(opts['file'], compiler == 'c') 7097330f729Sjoerg 7107330f729Sjoerg if language is None: 7117330f729Sjoerg logging.debug('skip analysis, language not known') 7127330f729Sjoerg return None 7137330f729Sjoerg elif language not in accepted: 7147330f729Sjoerg logging.debug('skip analysis, language not supported') 7157330f729Sjoerg return None 7167330f729Sjoerg else: 7177330f729Sjoerg logging.debug('analysis, language: %s', language) 7187330f729Sjoerg opts.update({'language': language, 7197330f729Sjoerg 'flags': ['-x', language] + opts['flags']}) 7207330f729Sjoerg return continuation(opts) 7217330f729Sjoerg 7227330f729Sjoerg 7237330f729Sjoerg@require(['arch_list', 'flags']) 7247330f729Sjoergdef arch_check(opts, continuation=language_check): 7257330f729Sjoerg """ Do run analyzer through one of the given architectures. """ 7267330f729Sjoerg 7277330f729Sjoerg disabled = frozenset({'ppc', 'ppc64'}) 7287330f729Sjoerg 7297330f729Sjoerg received_list = opts.pop('arch_list') 7307330f729Sjoerg if received_list: 7317330f729Sjoerg # filter out disabled architectures and -arch switches 7327330f729Sjoerg filtered_list = [a for a in received_list if a not in disabled] 7337330f729Sjoerg if filtered_list: 7347330f729Sjoerg # There should be only one arch given (or the same multiple 7357330f729Sjoerg # times). If there are multiple arch are given and are not 7367330f729Sjoerg # the same, those should not change the pre-processing step. 7377330f729Sjoerg # But that's the only pass we have before run the analyzer. 7387330f729Sjoerg current = filtered_list.pop() 7397330f729Sjoerg logging.debug('analysis, on arch: %s', current) 7407330f729Sjoerg 7417330f729Sjoerg opts.update({'flags': ['-arch', current] + opts['flags']}) 7427330f729Sjoerg return continuation(opts) 7437330f729Sjoerg else: 7447330f729Sjoerg logging.debug('skip analysis, found not supported arch') 7457330f729Sjoerg return None 7467330f729Sjoerg else: 7477330f729Sjoerg logging.debug('analysis, on default arch') 7487330f729Sjoerg return continuation(opts) 7497330f729Sjoerg 7507330f729Sjoerg 7517330f729Sjoerg# To have good results from static analyzer certain compiler options shall be 7527330f729Sjoerg# omitted. The compiler flag filtering only affects the static analyzer run. 7537330f729Sjoerg# 7547330f729Sjoerg# Keys are the option name, value number of options to skip 7557330f729SjoergIGNORED_FLAGS = { 7567330f729Sjoerg '-c': 0, # compile option will be overwritten 7577330f729Sjoerg '-fsyntax-only': 0, # static analyzer option will be overwritten 7587330f729Sjoerg '-o': 1, # will set up own output file 7597330f729Sjoerg # flags below are inherited from the perl implementation. 7607330f729Sjoerg '-g': 0, 7617330f729Sjoerg '-save-temps': 0, 7627330f729Sjoerg '-install_name': 1, 7637330f729Sjoerg '-exported_symbols_list': 1, 7647330f729Sjoerg '-current_version': 1, 7657330f729Sjoerg '-compatibility_version': 1, 7667330f729Sjoerg '-init': 1, 7677330f729Sjoerg '-e': 1, 7687330f729Sjoerg '-seg1addr': 1, 7697330f729Sjoerg '-bundle_loader': 1, 7707330f729Sjoerg '-multiply_defined': 1, 7717330f729Sjoerg '-sectorder': 3, 7727330f729Sjoerg '--param': 1, 7737330f729Sjoerg '--serialize-diagnostics': 1 7747330f729Sjoerg} 7757330f729Sjoerg 7767330f729Sjoerg 7777330f729Sjoergdef classify_parameters(command): 7787330f729Sjoerg """ Prepare compiler flags (filters some and add others) and take out 7797330f729Sjoerg language (-x) and architecture (-arch) flags for future processing. """ 7807330f729Sjoerg 7817330f729Sjoerg result = { 7827330f729Sjoerg 'flags': [], # the filtered compiler flags 7837330f729Sjoerg 'arch_list': [], # list of architecture flags 7847330f729Sjoerg 'language': None, # compilation language, None, if not specified 7857330f729Sjoerg 'compiler': compiler_language(command) # 'c' or 'c++' 7867330f729Sjoerg } 7877330f729Sjoerg 7887330f729Sjoerg # iterate on the compile options 7897330f729Sjoerg args = iter(command[1:]) 7907330f729Sjoerg for arg in args: 7917330f729Sjoerg # take arch flags into a separate basket 7927330f729Sjoerg if arg == '-arch': 7937330f729Sjoerg result['arch_list'].append(next(args)) 7947330f729Sjoerg # take language 7957330f729Sjoerg elif arg == '-x': 7967330f729Sjoerg result['language'] = next(args) 7977330f729Sjoerg # parameters which looks source file are not flags 7987330f729Sjoerg elif re.match(r'^[^-].+', arg) and classify_source(arg): 7997330f729Sjoerg pass 8007330f729Sjoerg # ignore some flags 8017330f729Sjoerg elif arg in IGNORED_FLAGS: 8027330f729Sjoerg count = IGNORED_FLAGS[arg] 8037330f729Sjoerg for _ in range(count): 8047330f729Sjoerg next(args) 8057330f729Sjoerg # we don't care about extra warnings, but we should suppress ones 8067330f729Sjoerg # that we don't want to see. 8077330f729Sjoerg elif re.match(r'^-W.+', arg) and not re.match(r'^-Wno-.+', arg): 8087330f729Sjoerg pass 8097330f729Sjoerg # and consider everything else as compilation flag. 8107330f729Sjoerg else: 8117330f729Sjoerg result['flags'].append(arg) 8127330f729Sjoerg 8137330f729Sjoerg return result 814