xref: /openbsd-src/gnu/llvm/clang/utils/analyzer/SATestBuild.py (revision 12c855180aad702bbcca06e0398d774beeafb155)
1e5dd7070Spatrick#!/usr/bin/env python
2e5dd7070Spatrick
3e5dd7070Spatrick"""
4e5dd7070SpatrickStatic Analyzer qualification infrastructure.
5e5dd7070Spatrick
6e5dd7070SpatrickThe goal is to test the analyzer against different projects,
7e5dd7070Spatrickcheck for failures, compare results, and measure performance.
8e5dd7070Spatrick
9e5dd7070SpatrickRepository Directory will contain sources of the projects as well as the
10e5dd7070Spatrickinformation on how to build them and the expected output.
11e5dd7070SpatrickRepository Directory structure:
12e5dd7070Spatrick   - ProjectMap file
13e5dd7070Spatrick   - Historical Performance Data
14e5dd7070Spatrick   - Project Dir1
15e5dd7070Spatrick     - ReferenceOutput
16e5dd7070Spatrick   - Project Dir2
17e5dd7070Spatrick     - ReferenceOutput
18e5dd7070Spatrick   ..
19e5dd7070SpatrickNote that the build tree must be inside the project dir.
20e5dd7070Spatrick
21e5dd7070SpatrickTo test the build of the analyzer one would:
22e5dd7070Spatrick   - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that
23e5dd7070Spatrick     the build directory does not pollute the repository to min network
24e5dd7070Spatrick     traffic).
25e5dd7070Spatrick   - Build all projects, until error. Produce logs to report errors.
26e5dd7070Spatrick   - Compare results.
27e5dd7070Spatrick
28e5dd7070SpatrickThe files which should be kept around for failure investigations:
29e5dd7070Spatrick   RepositoryCopy/Project DirI/ScanBuildResults
30e5dd7070Spatrick   RepositoryCopy/Project DirI/run_static_analyzer.log
31e5dd7070Spatrick
32e5dd7070SpatrickAssumptions (TODO: shouldn't need to assume these.):
33e5dd7070Spatrick   The script is being run from the Repository Directory.
34e5dd7070Spatrick   The compiler for scan-build and scan-build are in the PATH.
35e5dd7070Spatrick   export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
36e5dd7070Spatrick
37e5dd7070SpatrickFor more logging, set the  env variables:
38e5dd7070Spatrick   zaks:TI zaks$ export CCC_ANALYZER_LOG=1
39e5dd7070Spatrick   zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
40e5dd7070Spatrick
41e5dd7070SpatrickThe list of checkers tested are hardcoded in the Checkers variable.
42e5dd7070SpatrickFor testing additional checkers, use the SA_ADDITIONAL_CHECKERS environment
43e5dd7070Spatrickvariable. It should contain a comma separated list.
44e5dd7070Spatrick"""
45e5dd7070Spatrickimport CmpRuns
46ec727ea7Spatrickimport SATestUtils as utils
47ec727ea7Spatrickfrom ProjectMap import DownloadType, ProjectInfo
48e5dd7070Spatrick
49e5dd7070Spatrickimport glob
50e5dd7070Spatrickimport logging
51e5dd7070Spatrickimport math
52e5dd7070Spatrickimport multiprocessing
53e5dd7070Spatrickimport os
54e5dd7070Spatrickimport plistlib
55e5dd7070Spatrickimport shutil
56e5dd7070Spatrickimport sys
57e5dd7070Spatrickimport threading
58e5dd7070Spatrickimport time
59ec727ea7Spatrickimport zipfile
60ec727ea7Spatrick
61ec727ea7Spatrickfrom queue import Queue
62ec727ea7Spatrick# mypy has problems finding InvalidFileException in the module
63ec727ea7Spatrick# and this is we can shush that false positive
64ec727ea7Spatrickfrom plistlib import InvalidFileException  # type:ignore
65ec727ea7Spatrickfrom subprocess import CalledProcessError, check_call
66ec727ea7Spatrickfrom typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple
67ec727ea7Spatrick
68e5dd7070Spatrick
69e5dd7070Spatrick###############################################################################
70e5dd7070Spatrick# Helper functions.
71e5dd7070Spatrick###############################################################################
72e5dd7070Spatrick
73ec727ea7Spatrickclass StreamToLogger:
74ec727ea7Spatrick    def __init__(self, logger: logging.Logger,
75ec727ea7Spatrick                 log_level: int = logging.INFO):
76e5dd7070Spatrick        self.logger = logger
77e5dd7070Spatrick        self.log_level = log_level
78e5dd7070Spatrick
79ec727ea7Spatrick    def write(self, message: str):
80e5dd7070Spatrick        # Rstrip in order not to write an extra newline.
81ec727ea7Spatrick        self.logger.log(self.log_level, message.rstrip())
82e5dd7070Spatrick
83e5dd7070Spatrick    def flush(self):
84e5dd7070Spatrick        pass
85e5dd7070Spatrick
86ec727ea7Spatrick    def fileno(self) -> int:
87e5dd7070Spatrick        return 0
88e5dd7070Spatrick
89e5dd7070Spatrick
90ec727ea7SpatrickLOCAL = threading.local()
91e5dd7070Spatrick
92e5dd7070Spatrick
93ec727ea7Spatrickdef init_logger(name: str):
94ec727ea7Spatrick    # TODO: use debug levels for VERBOSE messages
95ec727ea7Spatrick    logger = logging.getLogger(name)
96ec727ea7Spatrick    logger.setLevel(logging.DEBUG)
97ec727ea7Spatrick    LOCAL.stdout = StreamToLogger(logger, logging.INFO)
98ec727ea7Spatrick    LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
99e5dd7070Spatrick
100e5dd7070Spatrick
101ec727ea7Spatrickinit_logger("main")
102ec727ea7Spatrick
103ec727ea7Spatrick
104ec727ea7Spatrickdef stderr(message: str):
105ec727ea7Spatrick    LOCAL.stderr.write(message)
106ec727ea7Spatrick
107ec727ea7Spatrick
108ec727ea7Spatrickdef stdout(message: str):
109ec727ea7Spatrick    LOCAL.stdout.write(message)
110ec727ea7Spatrick
111ec727ea7Spatrick
112ec727ea7Spatricklogging.basicConfig(
113ec727ea7Spatrick    format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
114ec727ea7Spatrick
115e5dd7070Spatrick
116e5dd7070Spatrick###############################################################################
117e5dd7070Spatrick# Configuration setup.
118e5dd7070Spatrick###############################################################################
119e5dd7070Spatrick
120e5dd7070Spatrick
121e5dd7070Spatrick# Find Clang for static analysis.
122e5dd7070Spatrickif 'CC' in os.environ:
123ec727ea7Spatrick    cc_candidate: Optional[str] = os.environ['CC']
124e5dd7070Spatrickelse:
125ec727ea7Spatrick    cc_candidate = utils.which("clang", os.environ['PATH'])
126ec727ea7Spatrickif not cc_candidate:
127ec727ea7Spatrick    stderr("Error: cannot find 'clang' in PATH")
128e5dd7070Spatrick    sys.exit(1)
129e5dd7070Spatrick
130ec727ea7SpatrickCLANG = cc_candidate
131e5dd7070Spatrick
132ec727ea7Spatrick# Number of jobs.
133ec727ea7SpatrickMAX_JOBS = int(math.ceil(multiprocessing.cpu_count() * 0.75))
134e5dd7070Spatrick
135e5dd7070Spatrick# Names of the project specific scripts.
136e5dd7070Spatrick# The script that downloads the project.
137ec727ea7SpatrickDOWNLOAD_SCRIPT = "download_project.sh"
138e5dd7070Spatrick# The script that needs to be executed before the build can start.
139ec727ea7SpatrickCLEANUP_SCRIPT = "cleanup_run_static_analyzer.sh"
140e5dd7070Spatrick# This is a file containing commands for scan-build.
141ec727ea7SpatrickBUILD_SCRIPT = "run_static_analyzer.cmd"
142e5dd7070Spatrick
143e5dd7070Spatrick# A comment in a build script which disables wrapping.
144ec727ea7SpatrickNO_PREFIX_CMD = "#NOPREFIX"
145e5dd7070Spatrick
146e5dd7070Spatrick# The log file name.
147ec727ea7SpatrickLOG_DIR_NAME = "Logs"
148ec727ea7SpatrickBUILD_LOG_NAME = "run_static_analyzer.log"
149e5dd7070Spatrick# Summary file - contains the summary of the failures. Ex: This info can be be
150e5dd7070Spatrick# displayed when buildbot detects a build failure.
151ec727ea7SpatrickNUM_OF_FAILURES_IN_SUMMARY = 10
152e5dd7070Spatrick
153e5dd7070Spatrick# The scan-build result directory.
154ec727ea7SpatrickOUTPUT_DIR_NAME = "ScanBuildResults"
155ec727ea7SpatrickREF_PREFIX = "Ref"
156e5dd7070Spatrick
157e5dd7070Spatrick# The name of the directory storing the cached project source. If this
158e5dd7070Spatrick# directory does not exist, the download script will be executed.
159e5dd7070Spatrick# That script should create the "CachedSource" directory and download the
160e5dd7070Spatrick# project source into it.
161ec727ea7SpatrickCACHED_SOURCE_DIR_NAME = "CachedSource"
162e5dd7070Spatrick
163e5dd7070Spatrick# The name of the directory containing the source code that will be analyzed.
164e5dd7070Spatrick# Each time a project is analyzed, a fresh copy of its CachedSource directory
165e5dd7070Spatrick# will be copied to the PatchedSource directory and then the local patches
166ec727ea7Spatrick# in PATCHFILE_NAME will be applied (if PATCHFILE_NAME exists).
167ec727ea7SpatrickPATCHED_SOURCE_DIR_NAME = "PatchedSource"
168e5dd7070Spatrick
169e5dd7070Spatrick# The name of the patchfile specifying any changes that should be applied
170e5dd7070Spatrick# to the CachedSource before analyzing.
171ec727ea7SpatrickPATCHFILE_NAME = "changes_for_analyzer.patch"
172e5dd7070Spatrick
173e5dd7070Spatrick# The list of checkers used during analyzes.
174e5dd7070Spatrick# Currently, consists of all the non-experimental checkers, plus a few alpha
175e5dd7070Spatrick# checkers we don't want to regress on.
176ec727ea7SpatrickCHECKERS = ",".join([
177e5dd7070Spatrick    "alpha.unix.SimpleStream",
178e5dd7070Spatrick    "alpha.security.taint",
179e5dd7070Spatrick    "cplusplus.NewDeleteLeaks",
180e5dd7070Spatrick    "core",
181e5dd7070Spatrick    "cplusplus",
182e5dd7070Spatrick    "deadcode",
183e5dd7070Spatrick    "security",
184e5dd7070Spatrick    "unix",
185e5dd7070Spatrick    "osx",
186e5dd7070Spatrick    "nullability"
187e5dd7070Spatrick])
188e5dd7070Spatrick
189ec727ea7SpatrickVERBOSE = 0
190ec727ea7Spatrick
191e5dd7070Spatrick
192e5dd7070Spatrick###############################################################################
193e5dd7070Spatrick# Test harness logic.
194e5dd7070Spatrick###############################################################################
195e5dd7070Spatrick
196e5dd7070Spatrick
197ec727ea7Spatrickdef run_cleanup_script(directory: str, build_log_file: IO):
198e5dd7070Spatrick    """
199e5dd7070Spatrick    Run pre-processing script if any.
200e5dd7070Spatrick    """
201ec727ea7Spatrick    cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
202ec727ea7Spatrick    script_path = os.path.join(directory, CLEANUP_SCRIPT)
203ec727ea7Spatrick
204ec727ea7Spatrick    utils.run_script(script_path, build_log_file, cwd,
205ec727ea7Spatrick                     out=LOCAL.stdout, err=LOCAL.stderr,
206ec727ea7Spatrick                     verbose=VERBOSE)
207e5dd7070Spatrick
208e5dd7070Spatrick
209ec727ea7Spatrickclass TestInfo(NamedTuple):
210e5dd7070Spatrick    """
211ec727ea7Spatrick    Information about a project and settings for its analysis.
212e5dd7070Spatrick    """
213ec727ea7Spatrick    project: ProjectInfo
214ec727ea7Spatrick    override_compiler: bool = False
215ec727ea7Spatrick    extra_analyzer_config: str = ""
216*12c85518Srobert    extra_checkers: str = ""
217ec727ea7Spatrick    is_reference_build: bool = False
218ec727ea7Spatrick    strictness: int = 0
219e5dd7070Spatrick
220e5dd7070Spatrick
221ec727ea7Spatrick# typing package doesn't have a separate type for Queue, but has a generic stub
222ec727ea7Spatrick# We still want to have a type-safe checked project queue, for this reason,
223ec727ea7Spatrick# we specify generic type for mypy.
224ec727ea7Spatrick#
225ec727ea7Spatrick# It is a common workaround for this situation:
226ec727ea7Spatrick# https://mypy.readthedocs.io/en/stable/common_issues.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
227ec727ea7Spatrickif TYPE_CHECKING:
228ec727ea7Spatrick    TestQueue = Queue[TestInfo]  # this is only processed by mypy
229e5dd7070Spatrickelse:
230ec727ea7Spatrick    TestQueue = Queue  # this will be executed at runtime
231e5dd7070Spatrick
232e5dd7070Spatrick
233ec727ea7Spatrickclass RegressionTester:
234e5dd7070Spatrick    """
235ec727ea7Spatrick    A component aggregating all of the project testing.
236e5dd7070Spatrick    """
237*12c85518Srobert
238ec727ea7Spatrick    def __init__(self, jobs: int, projects: List[ProjectInfo],
239ec727ea7Spatrick                 override_compiler: bool, extra_analyzer_config: str,
240*12c85518Srobert                 extra_checkers: str,
241ec727ea7Spatrick                 regenerate: bool, strictness: bool):
242ec727ea7Spatrick        self.jobs = jobs
243ec727ea7Spatrick        self.projects = projects
244ec727ea7Spatrick        self.override_compiler = override_compiler
245ec727ea7Spatrick        self.extra_analyzer_config = extra_analyzer_config
246*12c85518Srobert        self.extra_checkers = extra_checkers
247ec727ea7Spatrick        self.regenerate = regenerate
248ec727ea7Spatrick        self.strictness = strictness
249e5dd7070Spatrick
250ec727ea7Spatrick    def test_all(self) -> bool:
251ec727ea7Spatrick        projects_to_test: List[TestInfo] = []
252e5dd7070Spatrick
253ec727ea7Spatrick        # Test the projects.
254ec727ea7Spatrick        for project in self.projects:
255ec727ea7Spatrick            projects_to_test.append(
256ec727ea7Spatrick                TestInfo(project,
257ec727ea7Spatrick                         self.override_compiler,
258ec727ea7Spatrick                         self.extra_analyzer_config,
259*12c85518Srobert                         self.extra_checkers,
260ec727ea7Spatrick                         self.regenerate, self.strictness))
261ec727ea7Spatrick        if self.jobs <= 1:
262ec727ea7Spatrick            return self._single_threaded_test_all(projects_to_test)
263e5dd7070Spatrick        else:
264ec727ea7Spatrick            return self._multi_threaded_test_all(projects_to_test)
265e5dd7070Spatrick
266ec727ea7Spatrick    def _single_threaded_test_all(self,
267ec727ea7Spatrick                                  projects_to_test: List[TestInfo]) -> bool:
268e5dd7070Spatrick        """
269e5dd7070Spatrick        Run all projects.
270e5dd7070Spatrick        :return: whether tests have passed.
271e5dd7070Spatrick        """
272ec727ea7Spatrick        success = True
273ec727ea7Spatrick        for project_info in projects_to_test:
274ec727ea7Spatrick            tester = ProjectTester(project_info)
275ec727ea7Spatrick            success &= tester.test()
276ec727ea7Spatrick        return success
277e5dd7070Spatrick
278ec727ea7Spatrick    def _multi_threaded_test_all(self,
279ec727ea7Spatrick                                 projects_to_test: List[TestInfo]) -> bool:
280e5dd7070Spatrick        """
281e5dd7070Spatrick        Run each project in a separate thread.
282e5dd7070Spatrick
283e5dd7070Spatrick        This is OK despite GIL, as testing is blocked
284e5dd7070Spatrick        on launching external processes.
285e5dd7070Spatrick
286e5dd7070Spatrick        :return: whether tests have passed.
287e5dd7070Spatrick        """
288ec727ea7Spatrick        tasks_queue = TestQueue()
289e5dd7070Spatrick
290ec727ea7Spatrick        for project_info in projects_to_test:
291ec727ea7Spatrick            tasks_queue.put(project_info)
292e5dd7070Spatrick
293ec727ea7Spatrick        results_differ = threading.Event()
294ec727ea7Spatrick        failure_flag = threading.Event()
295e5dd7070Spatrick
296ec727ea7Spatrick        for _ in range(self.jobs):
297ec727ea7Spatrick            T = TestProjectThread(tasks_queue, results_differ, failure_flag)
298e5dd7070Spatrick            T.start()
299e5dd7070Spatrick
300e5dd7070Spatrick        # Required to handle Ctrl-C gracefully.
301ec727ea7Spatrick        while tasks_queue.unfinished_tasks:
302e5dd7070Spatrick            time.sleep(0.1)  # Seconds.
303ec727ea7Spatrick            if failure_flag.is_set():
304ec727ea7Spatrick                stderr("Test runner crashed\n")
305e5dd7070Spatrick                sys.exit(1)
306ec727ea7Spatrick        return not results_differ.is_set()
307e5dd7070Spatrick
308e5dd7070Spatrick
309ec727ea7Spatrickclass ProjectTester:
310ec727ea7Spatrick    """
311ec727ea7Spatrick    A component aggregating testing for one project.
312ec727ea7Spatrick    """
313*12c85518Srobert
314ec727ea7Spatrick    def __init__(self, test_info: TestInfo, silent: bool = False):
315ec727ea7Spatrick        self.project = test_info.project
316ec727ea7Spatrick        self.override_compiler = test_info.override_compiler
317ec727ea7Spatrick        self.extra_analyzer_config = test_info.extra_analyzer_config
318*12c85518Srobert        self.extra_checkers = test_info.extra_checkers
319ec727ea7Spatrick        self.is_reference_build = test_info.is_reference_build
320ec727ea7Spatrick        self.strictness = test_info.strictness
321ec727ea7Spatrick        self.silent = silent
322e5dd7070Spatrick
323ec727ea7Spatrick    def test(self) -> bool:
324ec727ea7Spatrick        """
325ec727ea7Spatrick        Test a given project.
326ec727ea7Spatrick        :return tests_passed: Whether tests have passed according
327ec727ea7Spatrick        to the :param strictness: criteria.
328ec727ea7Spatrick        """
329ec727ea7Spatrick        if not self.project.enabled:
330ec727ea7Spatrick            self.out(
331ec727ea7Spatrick                f" \n\n--- Skipping disabled project {self.project.name}\n")
332ec727ea7Spatrick            return True
333e5dd7070Spatrick
334ec727ea7Spatrick        self.out(f" \n\n--- Building project {self.project.name}\n")
335ec727ea7Spatrick
336ec727ea7Spatrick        start_time = time.time()
337ec727ea7Spatrick
338ec727ea7Spatrick        project_dir = self.get_project_dir()
339ec727ea7Spatrick        self.vout(f"  Build directory: {project_dir}.\n")
340ec727ea7Spatrick
341ec727ea7Spatrick        # Set the build results directory.
342ec727ea7Spatrick        output_dir = self.get_output_dir()
343ec727ea7Spatrick
344ec727ea7Spatrick        self.build(project_dir, output_dir)
345ec727ea7Spatrick        check_build(output_dir)
346ec727ea7Spatrick
347ec727ea7Spatrick        if self.is_reference_build:
348ec727ea7Spatrick            cleanup_reference_results(output_dir)
349ec727ea7Spatrick            passed = True
350e5dd7070Spatrick        else:
351ec727ea7Spatrick            passed = run_cmp_results(project_dir, self.strictness)
352ec727ea7Spatrick
353ec727ea7Spatrick        self.out(f"Completed tests for project {self.project.name} "
354ec727ea7Spatrick                 f"(time: {time.time() - start_time:.2f}).\n")
355ec727ea7Spatrick
356ec727ea7Spatrick        return passed
357ec727ea7Spatrick
358ec727ea7Spatrick    def get_project_dir(self) -> str:
359ec727ea7Spatrick        return os.path.join(os.path.abspath(os.curdir), self.project.name)
360ec727ea7Spatrick
361ec727ea7Spatrick    def get_output_dir(self) -> str:
362ec727ea7Spatrick        if self.is_reference_build:
363ec727ea7Spatrick            dirname = REF_PREFIX + OUTPUT_DIR_NAME
364ec727ea7Spatrick        else:
365ec727ea7Spatrick            dirname = OUTPUT_DIR_NAME
366ec727ea7Spatrick
367ec727ea7Spatrick        return os.path.join(self.get_project_dir(), dirname)
368ec727ea7Spatrick
369ec727ea7Spatrick    def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
370ec727ea7Spatrick        build_log_path = get_build_log_path(output_dir)
371ec727ea7Spatrick
372ec727ea7Spatrick        self.out(f"Log file: {build_log_path}\n")
373ec727ea7Spatrick        self.out(f"Output directory: {output_dir}\n")
374ec727ea7Spatrick
375ec727ea7Spatrick        remove_log_file(output_dir)
376ec727ea7Spatrick
377ec727ea7Spatrick        # Clean up scan build results.
378ec727ea7Spatrick        if os.path.exists(output_dir):
379ec727ea7Spatrick            self.vout(f"  Removing old results: {output_dir}\n")
380ec727ea7Spatrick
381ec727ea7Spatrick            shutil.rmtree(output_dir)
382ec727ea7Spatrick
383ec727ea7Spatrick        assert(not os.path.exists(output_dir))
384ec727ea7Spatrick        os.makedirs(os.path.join(output_dir, LOG_DIR_NAME))
385ec727ea7Spatrick
386ec727ea7Spatrick        # Build and analyze the project.
387ec727ea7Spatrick        with open(build_log_path, "w+") as build_log_file:
388ec727ea7Spatrick            if self.project.mode == 1:
389ec727ea7Spatrick                self._download_and_patch(directory, build_log_file)
390ec727ea7Spatrick                run_cleanup_script(directory, build_log_file)
391ec727ea7Spatrick                build_time, memory = self.scan_build(directory, output_dir,
392ec727ea7Spatrick                                                     build_log_file)
393ec727ea7Spatrick            else:
394ec727ea7Spatrick                build_time, memory = self.analyze_preprocessed(directory,
395ec727ea7Spatrick                                                               output_dir)
396ec727ea7Spatrick
397ec727ea7Spatrick            if self.is_reference_build:
398ec727ea7Spatrick                run_cleanup_script(directory, build_log_file)
399ec727ea7Spatrick                normalize_reference_results(directory, output_dir,
400ec727ea7Spatrick                                            self.project.mode)
401ec727ea7Spatrick
402ec727ea7Spatrick        self.out(f"Build complete (time: {utils.time_to_str(build_time)}, "
403ec727ea7Spatrick                 f"peak memory: {utils.memory_to_str(memory)}). "
404ec727ea7Spatrick                 f"See the log for more details: {build_log_path}\n")
405ec727ea7Spatrick
406ec727ea7Spatrick        return build_time, memory
407ec727ea7Spatrick
408ec727ea7Spatrick    def scan_build(self, directory: str, output_dir: str,
409ec727ea7Spatrick                   build_log_file: IO) -> Tuple[float, int]:
410ec727ea7Spatrick        """
411ec727ea7Spatrick        Build the project with scan-build by reading in the commands and
412ec727ea7Spatrick        prefixing them with the scan-build options.
413ec727ea7Spatrick        """
414ec727ea7Spatrick        build_script_path = os.path.join(directory, BUILD_SCRIPT)
415ec727ea7Spatrick        if not os.path.exists(build_script_path):
416ec727ea7Spatrick            stderr(f"Error: build script is not defined: "
417ec727ea7Spatrick                   f"{build_script_path}\n")
418ec727ea7Spatrick            sys.exit(1)
419ec727ea7Spatrick
420ec727ea7Spatrick        all_checkers = CHECKERS
421ec727ea7Spatrick        if 'SA_ADDITIONAL_CHECKERS' in os.environ:
422ec727ea7Spatrick            all_checkers = (all_checkers + ',' +
423ec727ea7Spatrick                            os.environ['SA_ADDITIONAL_CHECKERS'])
424*12c85518Srobert        if self.extra_checkers != "":
425*12c85518Srobert            all_checkers += "," + self.extra_checkers
426ec727ea7Spatrick
427ec727ea7Spatrick        # Run scan-build from within the patched source directory.
428ec727ea7Spatrick        cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
429ec727ea7Spatrick
430ec727ea7Spatrick        options = f"--use-analyzer '{CLANG}' "
431ec727ea7Spatrick        options += f"-plist-html -o '{output_dir}' "
432ec727ea7Spatrick        options += f"-enable-checker {all_checkers} "
433ec727ea7Spatrick        options += "--keep-empty "
434ec727ea7Spatrick        options += f"-analyzer-config '{self.generate_config()}' "
435ec727ea7Spatrick
436ec727ea7Spatrick        if self.override_compiler:
437ec727ea7Spatrick            options += "--override-compiler "
438ec727ea7Spatrick
439ec727ea7Spatrick        extra_env: Dict[str, str] = {}
440ec727ea7Spatrick
441ec727ea7Spatrick        execution_time = 0.0
442ec727ea7Spatrick        peak_memory = 0
443ec727ea7Spatrick
444ec727ea7Spatrick        try:
445ec727ea7Spatrick            command_file = open(build_script_path, "r")
446ec727ea7Spatrick            command_prefix = "scan-build " + options + " "
447ec727ea7Spatrick
448ec727ea7Spatrick            for command in command_file:
449ec727ea7Spatrick                command = command.strip()
450ec727ea7Spatrick
451ec727ea7Spatrick                if len(command) == 0:
452ec727ea7Spatrick                    continue
453ec727ea7Spatrick
454ec727ea7Spatrick                # Custom analyzer invocation specified by project.
455ec727ea7Spatrick                # Communicate required information using environment variables
456ec727ea7Spatrick                # instead.
457ec727ea7Spatrick                if command == NO_PREFIX_CMD:
458ec727ea7Spatrick                    command_prefix = ""
459ec727ea7Spatrick                    extra_env['OUTPUT'] = output_dir
460ec727ea7Spatrick                    extra_env['CC'] = CLANG
461ec727ea7Spatrick                    extra_env['ANALYZER_CONFIG'] = self.generate_config()
462ec727ea7Spatrick                    continue
463ec727ea7Spatrick
464ec727ea7Spatrick                if command.startswith("#"):
465ec727ea7Spatrick                    continue
466ec727ea7Spatrick
467ec727ea7Spatrick                # If using 'make', auto imply a -jX argument
468ec727ea7Spatrick                # to speed up analysis.  xcodebuild will
469ec727ea7Spatrick                # automatically use the maximum number of cores.
470ec727ea7Spatrick                if (command.startswith("make ") or command == "make") and \
471ec727ea7Spatrick                        "-j" not in command:
472ec727ea7Spatrick                    command += f" -j{MAX_JOBS}"
473ec727ea7Spatrick
474ec727ea7Spatrick                command_to_run = command_prefix + command
475ec727ea7Spatrick
476ec727ea7Spatrick                self.vout(f"  Executing: {command_to_run}\n")
477ec727ea7Spatrick
478ec727ea7Spatrick                time, mem = utils.check_and_measure_call(
479ec727ea7Spatrick                    command_to_run, cwd=cwd,
480ec727ea7Spatrick                    stderr=build_log_file,
481ec727ea7Spatrick                    stdout=build_log_file,
482ec727ea7Spatrick                    env=dict(os.environ, **extra_env),
483ec727ea7Spatrick                    shell=True)
484ec727ea7Spatrick
485ec727ea7Spatrick                execution_time += time
486ec727ea7Spatrick                peak_memory = max(peak_memory, mem)
487ec727ea7Spatrick
488ec727ea7Spatrick        except CalledProcessError:
489ec727ea7Spatrick            stderr("Error: scan-build failed. Its output was: \n")
490ec727ea7Spatrick            build_log_file.seek(0)
491ec727ea7Spatrick            shutil.copyfileobj(build_log_file, LOCAL.stderr)
492ec727ea7Spatrick            sys.exit(1)
493ec727ea7Spatrick
494ec727ea7Spatrick        return execution_time, peak_memory
495ec727ea7Spatrick
496ec727ea7Spatrick    def analyze_preprocessed(self, directory: str,
497ec727ea7Spatrick                             output_dir: str) -> Tuple[float, int]:
498ec727ea7Spatrick        """
499ec727ea7Spatrick        Run analysis on a set of preprocessed files.
500ec727ea7Spatrick        """
501ec727ea7Spatrick        if os.path.exists(os.path.join(directory, BUILD_SCRIPT)):
502ec727ea7Spatrick            stderr(f"Error: The preprocessed files project "
503ec727ea7Spatrick                   f"should not contain {BUILD_SCRIPT}\n")
504ec727ea7Spatrick            raise Exception()
505ec727ea7Spatrick
506ec727ea7Spatrick        prefix = CLANG + " --analyze "
507ec727ea7Spatrick
508ec727ea7Spatrick        prefix += "--analyzer-output plist "
509ec727ea7Spatrick        prefix += " -Xclang -analyzer-checker=" + CHECKERS
510ec727ea7Spatrick        prefix += " -fcxx-exceptions -fblocks "
511ec727ea7Spatrick        prefix += " -Xclang -analyzer-config "
512ec727ea7Spatrick        prefix += f"-Xclang {self.generate_config()} "
513ec727ea7Spatrick
514ec727ea7Spatrick        if self.project.mode == 2:
515ec727ea7Spatrick            prefix += "-std=c++11 "
516ec727ea7Spatrick
517ec727ea7Spatrick        plist_path = os.path.join(directory, output_dir, "date")
518ec727ea7Spatrick        fail_path = os.path.join(plist_path, "failures")
519ec727ea7Spatrick        os.makedirs(fail_path)
520ec727ea7Spatrick
521ec727ea7Spatrick        execution_time = 0.0
522ec727ea7Spatrick        peak_memory = 0
523ec727ea7Spatrick
524ec727ea7Spatrick        for full_file_name in glob.glob(directory + "/*"):
525ec727ea7Spatrick            file_name = os.path.basename(full_file_name)
526ec727ea7Spatrick            failed = False
527ec727ea7Spatrick
528ec727ea7Spatrick            # Only run the analyzes on supported files.
529ec727ea7Spatrick            if utils.has_no_extension(file_name):
530ec727ea7Spatrick                continue
531ec727ea7Spatrick            if not utils.is_valid_single_input_file(file_name):
532ec727ea7Spatrick                stderr(f"Error: Invalid single input file {full_file_name}.\n")
533ec727ea7Spatrick                raise Exception()
534ec727ea7Spatrick
535ec727ea7Spatrick            # Build and call the analyzer command.
536ec727ea7Spatrick            plist_basename = os.path.join(plist_path, file_name)
537ec727ea7Spatrick            output_option = f"-o '{plist_basename}.plist' "
538ec727ea7Spatrick            command = f"{prefix}{output_option}'{file_name}'"
539ec727ea7Spatrick
540ec727ea7Spatrick            log_path = os.path.join(fail_path, file_name + ".stderr.txt")
541ec727ea7Spatrick            with open(log_path, "w+") as log_file:
542ec727ea7Spatrick                try:
543ec727ea7Spatrick                    self.vout(f"  Executing: {command}\n")
544ec727ea7Spatrick
545ec727ea7Spatrick                    time, mem = utils.check_and_measure_call(
546ec727ea7Spatrick                        command, cwd=directory, stderr=log_file,
547ec727ea7Spatrick                        stdout=log_file, shell=True)
548ec727ea7Spatrick
549ec727ea7Spatrick                    execution_time += time
550ec727ea7Spatrick                    peak_memory = max(peak_memory, mem)
551ec727ea7Spatrick
552ec727ea7Spatrick                except CalledProcessError as e:
553ec727ea7Spatrick                    stderr(f"Error: Analyzes of {full_file_name} failed. "
554ec727ea7Spatrick                           f"See {log_file.name} for details. "
555ec727ea7Spatrick                           f"Error code {e.returncode}.\n")
556ec727ea7Spatrick                    failed = True
557ec727ea7Spatrick
558ec727ea7Spatrick                # If command did not fail, erase the log file.
559ec727ea7Spatrick                if not failed:
560ec727ea7Spatrick                    os.remove(log_file.name)
561ec727ea7Spatrick
562ec727ea7Spatrick        return execution_time, peak_memory
563ec727ea7Spatrick
564ec727ea7Spatrick    def generate_config(self) -> str:
565ec727ea7Spatrick        out = "serialize-stats=true,stable-report-filename=true"
566ec727ea7Spatrick
567ec727ea7Spatrick        if self.extra_analyzer_config:
568ec727ea7Spatrick            out += "," + self.extra_analyzer_config
569ec727ea7Spatrick
570ec727ea7Spatrick        return out
571ec727ea7Spatrick
572ec727ea7Spatrick    def _download_and_patch(self, directory: str, build_log_file: IO):
573ec727ea7Spatrick        """
574ec727ea7Spatrick        Download the project and apply the local patchfile if it exists.
575ec727ea7Spatrick        """
576ec727ea7Spatrick        cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
577ec727ea7Spatrick
578ec727ea7Spatrick        # If the we don't already have the cached source, run the project's
579ec727ea7Spatrick        # download script to download it.
580ec727ea7Spatrick        if not os.path.exists(cached_source):
581ec727ea7Spatrick            self._download(directory, build_log_file)
582ec727ea7Spatrick            if not os.path.exists(cached_source):
583ec727ea7Spatrick                stderr(f"Error: '{cached_source}' not found after download.\n")
584ec727ea7Spatrick                exit(1)
585ec727ea7Spatrick
586ec727ea7Spatrick        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
587ec727ea7Spatrick
588ec727ea7Spatrick        # Remove potentially stale patched source.
589ec727ea7Spatrick        if os.path.exists(patched_source):
590ec727ea7Spatrick            shutil.rmtree(patched_source)
591ec727ea7Spatrick
592ec727ea7Spatrick        # Copy the cached source and apply any patches to the copy.
593ec727ea7Spatrick        shutil.copytree(cached_source, patched_source, symlinks=True)
594ec727ea7Spatrick        self._apply_patch(directory, build_log_file)
595ec727ea7Spatrick
596ec727ea7Spatrick    def _download(self, directory: str, build_log_file: IO):
597ec727ea7Spatrick        """
598ec727ea7Spatrick        Run the script to download the project, if it exists.
599ec727ea7Spatrick        """
600ec727ea7Spatrick        if self.project.source == DownloadType.GIT:
601ec727ea7Spatrick            self._download_from_git(directory, build_log_file)
602ec727ea7Spatrick        elif self.project.source == DownloadType.ZIP:
603ec727ea7Spatrick            self._unpack_zip(directory, build_log_file)
604ec727ea7Spatrick        elif self.project.source == DownloadType.SCRIPT:
605ec727ea7Spatrick            self._run_download_script(directory, build_log_file)
606ec727ea7Spatrick        else:
607ec727ea7Spatrick            raise ValueError(
608ec727ea7Spatrick                f"Unknown source type '{self.project.source}' is found "
609ec727ea7Spatrick                f"for the '{self.project.name}' project")
610ec727ea7Spatrick
611ec727ea7Spatrick    def _download_from_git(self, directory: str, build_log_file: IO):
612ec727ea7Spatrick        repo = self.project.origin
613ec727ea7Spatrick        cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
614ec727ea7Spatrick
615ec727ea7Spatrick        check_call(f"git clone --recursive {repo} {cached_source}",
616ec727ea7Spatrick                   cwd=directory, stderr=build_log_file,
617ec727ea7Spatrick                   stdout=build_log_file, shell=True)
618ec727ea7Spatrick        check_call(f"git checkout --quiet {self.project.commit}",
619ec727ea7Spatrick                   cwd=cached_source, stderr=build_log_file,
620ec727ea7Spatrick                   stdout=build_log_file, shell=True)
621ec727ea7Spatrick
622ec727ea7Spatrick    def _unpack_zip(self, directory: str, build_log_file: IO):
623ec727ea7Spatrick        zip_files = list(glob.glob(directory + "/*.zip"))
624ec727ea7Spatrick
625ec727ea7Spatrick        if len(zip_files) == 0:
626ec727ea7Spatrick            raise ValueError(
627ec727ea7Spatrick                f"Couldn't find any zip files to unpack for the "
628ec727ea7Spatrick                f"'{self.project.name}' project")
629ec727ea7Spatrick
630ec727ea7Spatrick        if len(zip_files) > 1:
631ec727ea7Spatrick            raise ValueError(
632ec727ea7Spatrick                f"Couldn't decide which of the zip files ({zip_files}) "
633ec727ea7Spatrick                f"for the '{self.project.name}' project to unpack")
634ec727ea7Spatrick
635ec727ea7Spatrick        with zipfile.ZipFile(zip_files[0], "r") as zip_file:
636ec727ea7Spatrick            zip_file.extractall(os.path.join(directory,
637ec727ea7Spatrick                                             CACHED_SOURCE_DIR_NAME))
638ec727ea7Spatrick
639ec727ea7Spatrick    @staticmethod
640ec727ea7Spatrick    def _run_download_script(directory: str, build_log_file: IO):
641ec727ea7Spatrick        script_path = os.path.join(directory, DOWNLOAD_SCRIPT)
642ec727ea7Spatrick        utils.run_script(script_path, build_log_file, directory,
643ec727ea7Spatrick                         out=LOCAL.stdout, err=LOCAL.stderr,
644ec727ea7Spatrick                         verbose=VERBOSE)
645ec727ea7Spatrick
646ec727ea7Spatrick    def _apply_patch(self, directory: str, build_log_file: IO):
647ec727ea7Spatrick        patchfile_path = os.path.join(directory, PATCHFILE_NAME)
648ec727ea7Spatrick        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
649ec727ea7Spatrick
650ec727ea7Spatrick        if not os.path.exists(patchfile_path):
651ec727ea7Spatrick            self.out("  No local patches.\n")
652ec727ea7Spatrick            return
653ec727ea7Spatrick
654ec727ea7Spatrick        self.out("  Applying patch.\n")
655ec727ea7Spatrick        try:
656ec727ea7Spatrick            check_call(f"patch -p1 < '{patchfile_path}'",
657ec727ea7Spatrick                       cwd=patched_source,
658ec727ea7Spatrick                       stderr=build_log_file,
659ec727ea7Spatrick                       stdout=build_log_file,
660ec727ea7Spatrick                       shell=True)
661ec727ea7Spatrick
662ec727ea7Spatrick        except CalledProcessError:
663ec727ea7Spatrick            stderr(f"Error: Patch failed. "
664ec727ea7Spatrick                   f"See {build_log_file.name} for details.\n")
665ec727ea7Spatrick            sys.exit(1)
666ec727ea7Spatrick
667ec727ea7Spatrick    def out(self, what: str):
668ec727ea7Spatrick        if not self.silent:
669ec727ea7Spatrick            stdout(what)
670ec727ea7Spatrick
671ec727ea7Spatrick    def vout(self, what: str):
672ec727ea7Spatrick        if VERBOSE >= 1:
673ec727ea7Spatrick            self.out(what)
674e5dd7070Spatrick
675e5dd7070Spatrick
676ec727ea7Spatrickclass TestProjectThread(threading.Thread):
677ec727ea7Spatrick    def __init__(self, tasks_queue: TestQueue,
678ec727ea7Spatrick                 results_differ: threading.Event,
679ec727ea7Spatrick                 failure_flag: threading.Event):
680ec727ea7Spatrick        """
681ec727ea7Spatrick        :param results_differ: Used to signify that results differ from
682ec727ea7Spatrick               the canonical ones.
683ec727ea7Spatrick        :param failure_flag: Used to signify a failure during the run.
684ec727ea7Spatrick        """
685ec727ea7Spatrick        self.tasks_queue = tasks_queue
686ec727ea7Spatrick        self.results_differ = results_differ
687ec727ea7Spatrick        self.failure_flag = failure_flag
688ec727ea7Spatrick        super().__init__()
689e5dd7070Spatrick
690ec727ea7Spatrick        # Needed to gracefully handle interrupts with Ctrl-C
691ec727ea7Spatrick        self.daemon = True
692ec727ea7Spatrick
693ec727ea7Spatrick    def run(self):
694ec727ea7Spatrick        while not self.tasks_queue.empty():
695ec727ea7Spatrick            try:
696ec727ea7Spatrick                test_info = self.tasks_queue.get()
697ec727ea7Spatrick                init_logger(test_info.project.name)
698ec727ea7Spatrick
699ec727ea7Spatrick                tester = ProjectTester(test_info)
700ec727ea7Spatrick                if not tester.test():
701ec727ea7Spatrick                    self.results_differ.set()
702ec727ea7Spatrick
703ec727ea7Spatrick                self.tasks_queue.task_done()
704ec727ea7Spatrick
705ec727ea7Spatrick            except BaseException:
706ec727ea7Spatrick                self.failure_flag.set()
707ec727ea7Spatrick                raise
708ec727ea7Spatrick
709ec727ea7Spatrick
710ec727ea7Spatrick###############################################################################
711ec727ea7Spatrick# Utility functions.
712ec727ea7Spatrick###############################################################################
713ec727ea7Spatrick
714ec727ea7Spatrick
715ec727ea7Spatrickdef check_build(output_dir: str):
716ec727ea7Spatrick    """
717ec727ea7Spatrick    Given the scan-build output directory, checks if the build failed
718ec727ea7Spatrick    (by searching for the failures directories). If there are failures, it
719ec727ea7Spatrick    creates a summary file in the output directory.
720ec727ea7Spatrick
721ec727ea7Spatrick    """
722ec727ea7Spatrick    # Check if there are failures.
723ec727ea7Spatrick    failures = glob.glob(output_dir + "/*/failures/*.stderr.txt")
724ec727ea7Spatrick    total_failed = len(failures)
725ec727ea7Spatrick
726ec727ea7Spatrick    if total_failed == 0:
727ec727ea7Spatrick        clean_up_empty_plists(output_dir)
728ec727ea7Spatrick        clean_up_empty_folders(output_dir)
729ec727ea7Spatrick
730ec727ea7Spatrick        plists = glob.glob(output_dir + "/*/*.plist")
731ec727ea7Spatrick        stdout(f"Number of bug reports "
732ec727ea7Spatrick               f"(non-empty plist files) produced: {len(plists)}\n")
733ec727ea7Spatrick        return
734ec727ea7Spatrick
735ec727ea7Spatrick    stderr("Error: analysis failed.\n")
736ec727ea7Spatrick    stderr(f"Total of {total_failed} failures discovered.\n")
737ec727ea7Spatrick
738ec727ea7Spatrick    if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
739ec727ea7Spatrick        stderr(f"See the first {NUM_OF_FAILURES_IN_SUMMARY} below.\n")
740ec727ea7Spatrick
741ec727ea7Spatrick    for index, failed_log_path in enumerate(failures, start=1):
742ec727ea7Spatrick        if index >= NUM_OF_FAILURES_IN_SUMMARY:
743ec727ea7Spatrick            break
744ec727ea7Spatrick
745ec727ea7Spatrick        stderr(f"\n-- Error #{index} -----------\n")
746ec727ea7Spatrick
747ec727ea7Spatrick        with open(failed_log_path, "r") as failed_log:
748ec727ea7Spatrick            shutil.copyfileobj(failed_log, LOCAL.stdout)
749ec727ea7Spatrick
750ec727ea7Spatrick    if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
751ec727ea7Spatrick        stderr("See the results folder for more.")
752ec727ea7Spatrick
753ec727ea7Spatrick    sys.exit(1)
754ec727ea7Spatrick
755ec727ea7Spatrick
756ec727ea7Spatrickdef cleanup_reference_results(output_dir: str):
757ec727ea7Spatrick    """
758ec727ea7Spatrick    Delete html, css, and js files from reference results. These can
759ec727ea7Spatrick    include multiple copies of the benchmark source and so get very large.
760ec727ea7Spatrick    """
761ec727ea7Spatrick    extensions = ["html", "css", "js"]
762ec727ea7Spatrick
763ec727ea7Spatrick    for extension in extensions:
764ec727ea7Spatrick        for file_to_rm in glob.glob(f"{output_dir}/*/*.{extension}"):
765ec727ea7Spatrick            file_to_rm = os.path.join(output_dir, file_to_rm)
766ec727ea7Spatrick            os.remove(file_to_rm)
767ec727ea7Spatrick
768ec727ea7Spatrick    # Remove the log file. It leaks absolute path names.
769ec727ea7Spatrick    remove_log_file(output_dir)
770ec727ea7Spatrick
771ec727ea7Spatrick
772ec727ea7Spatrickdef run_cmp_results(directory: str, strictness: int = 0) -> bool:
773ec727ea7Spatrick    """
774ec727ea7Spatrick    Compare the warnings produced by scan-build.
775ec727ea7Spatrick    strictness defines the success criteria for the test:
776ec727ea7Spatrick      0 - success if there are no crashes or analyzer failure.
777ec727ea7Spatrick      1 - success if there are no difference in the number of reported bugs.
778ec727ea7Spatrick      2 - success if all the bug reports are identical.
779ec727ea7Spatrick
780ec727ea7Spatrick    :return success: Whether tests pass according to the strictness
781ec727ea7Spatrick    criteria.
782ec727ea7Spatrick    """
783ec727ea7Spatrick    tests_passed = True
784ec727ea7Spatrick    start_time = time.time()
785ec727ea7Spatrick
786ec727ea7Spatrick    ref_dir = os.path.join(directory, REF_PREFIX + OUTPUT_DIR_NAME)
787ec727ea7Spatrick    new_dir = os.path.join(directory, OUTPUT_DIR_NAME)
788ec727ea7Spatrick
789ec727ea7Spatrick    # We have to go one level down the directory tree.
790ec727ea7Spatrick    ref_list = glob.glob(ref_dir + "/*")
791ec727ea7Spatrick    new_list = glob.glob(new_dir + "/*")
792ec727ea7Spatrick
793ec727ea7Spatrick    # Log folders are also located in the results dir, so ignore them.
794ec727ea7Spatrick    ref_log_dir = os.path.join(ref_dir, LOG_DIR_NAME)
795ec727ea7Spatrick    if ref_log_dir in ref_list:
796ec727ea7Spatrick        ref_list.remove(ref_log_dir)
797ec727ea7Spatrick    new_list.remove(os.path.join(new_dir, LOG_DIR_NAME))
798ec727ea7Spatrick
799ec727ea7Spatrick    if len(ref_list) != len(new_list):
800ec727ea7Spatrick        stderr(f"Mismatch in number of results folders: "
801ec727ea7Spatrick               f"{ref_list} vs {new_list}")
802ec727ea7Spatrick        sys.exit(1)
803ec727ea7Spatrick
804ec727ea7Spatrick    # There might be more then one folder underneath - one per each scan-build
805ec727ea7Spatrick    # command (Ex: one for configure and one for make).
806ec727ea7Spatrick    if len(ref_list) > 1:
807ec727ea7Spatrick        # Assume that the corresponding folders have the same names.
808ec727ea7Spatrick        ref_list.sort()
809ec727ea7Spatrick        new_list.sort()
810ec727ea7Spatrick
811ec727ea7Spatrick    # Iterate and find the differences.
812ec727ea7Spatrick    num_diffs = 0
813ec727ea7Spatrick    for ref_dir, new_dir in zip(ref_list, new_list):
814ec727ea7Spatrick        assert(ref_dir != new_dir)
815ec727ea7Spatrick
816ec727ea7Spatrick        if VERBOSE >= 1:
817ec727ea7Spatrick            stdout(f"  Comparing Results: {ref_dir} {new_dir}\n")
818ec727ea7Spatrick
819ec727ea7Spatrick        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
820ec727ea7Spatrick
821ec727ea7Spatrick        ref_results = CmpRuns.ResultsDirectory(ref_dir)
822ec727ea7Spatrick        new_results = CmpRuns.ResultsDirectory(new_dir, patched_source)
823ec727ea7Spatrick
824ec727ea7Spatrick        # Scan the results, delete empty plist files.
825ec727ea7Spatrick        num_diffs, reports_in_ref, reports_in_new = \
826ec727ea7Spatrick            CmpRuns.dump_scan_build_results_diff(ref_results, new_results,
827ec727ea7Spatrick                                                 delete_empty=False,
828ec727ea7Spatrick                                                 out=LOCAL.stdout)
829ec727ea7Spatrick
830ec727ea7Spatrick        if num_diffs > 0:
831ec727ea7Spatrick            stdout(f"Warning: {num_diffs} differences in diagnostics.\n")
832ec727ea7Spatrick
833ec727ea7Spatrick        if strictness >= 2 and num_diffs > 0:
834ec727ea7Spatrick            stdout("Error: Diffs found in strict mode (2).\n")
835ec727ea7Spatrick            tests_passed = False
836ec727ea7Spatrick
837ec727ea7Spatrick        elif strictness >= 1 and reports_in_ref != reports_in_new:
838ec727ea7Spatrick            stdout("Error: The number of results are different "
839ec727ea7Spatrick                   " strict mode (1).\n")
840ec727ea7Spatrick            tests_passed = False
841ec727ea7Spatrick
842ec727ea7Spatrick    stdout(f"Diagnostic comparison complete "
843ec727ea7Spatrick           f"(time: {time.time() - start_time:.2f}).\n")
844ec727ea7Spatrick
845ec727ea7Spatrick    return tests_passed
846ec727ea7Spatrick
847ec727ea7Spatrick
848ec727ea7Spatrickdef normalize_reference_results(directory: str, output_dir: str,
849ec727ea7Spatrick                                build_mode: int):
850ec727ea7Spatrick    """
851ec727ea7Spatrick    Make the absolute paths relative in the reference results.
852ec727ea7Spatrick    """
853ec727ea7Spatrick    for dir_path, _, filenames in os.walk(output_dir):
854ec727ea7Spatrick        for filename in filenames:
855ec727ea7Spatrick            if not filename.endswith('plist'):
856ec727ea7Spatrick                continue
857ec727ea7Spatrick
858ec727ea7Spatrick            plist = os.path.join(dir_path, filename)
859*12c85518Srobert            with open(plist, "rb") as plist_file:
860*12c85518Srobert                data = plistlib.load(plist_file)
861ec727ea7Spatrick            path_prefix = directory
862ec727ea7Spatrick
863ec727ea7Spatrick            if build_mode == 1:
864ec727ea7Spatrick                path_prefix = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
865ec727ea7Spatrick
866ec727ea7Spatrick            paths = [source[len(path_prefix) + 1:]
867ec727ea7Spatrick                     if source.startswith(path_prefix) else source
868ec727ea7Spatrick                     for source in data['files']]
869ec727ea7Spatrick            data['files'] = paths
870ec727ea7Spatrick
871ec727ea7Spatrick            # Remove transient fields which change from run to run.
872ec727ea7Spatrick            for diagnostic in data['diagnostics']:
873ec727ea7Spatrick                if 'HTMLDiagnostics_files' in diagnostic:
874ec727ea7Spatrick                    diagnostic.pop('HTMLDiagnostics_files')
875ec727ea7Spatrick
876ec727ea7Spatrick            if 'clang_version' in data:
877ec727ea7Spatrick                data.pop('clang_version')
878ec727ea7Spatrick
879*12c85518Srobert            with open(plist, "wb") as plist_file:
880*12c85518Srobert                plistlib.dump(data, plist_file)
881ec727ea7Spatrick
882ec727ea7Spatrick
883ec727ea7Spatrickdef get_build_log_path(output_dir: str) -> str:
884ec727ea7Spatrick    return os.path.join(output_dir, LOG_DIR_NAME, BUILD_LOG_NAME)
885ec727ea7Spatrick
886ec727ea7Spatrick
887ec727ea7Spatrickdef remove_log_file(output_dir: str):
888ec727ea7Spatrick    build_log_path = get_build_log_path(output_dir)
889ec727ea7Spatrick
890ec727ea7Spatrick    # Clean up the log file.
891ec727ea7Spatrick    if os.path.exists(build_log_path):
892ec727ea7Spatrick        if VERBOSE >= 1:
893ec727ea7Spatrick            stdout(f"  Removing log file: {build_log_path}\n")
894ec727ea7Spatrick
895ec727ea7Spatrick        os.remove(build_log_path)
896ec727ea7Spatrick
897ec727ea7Spatrick
898ec727ea7Spatrickdef clean_up_empty_plists(output_dir: str):
899ec727ea7Spatrick    """
900ec727ea7Spatrick    A plist file is created for each call to the analyzer(each source file).
901ec727ea7Spatrick    We are only interested on the once that have bug reports,
902ec727ea7Spatrick    so delete the rest.
903ec727ea7Spatrick    """
904ec727ea7Spatrick    for plist in glob.glob(output_dir + "/*/*.plist"):
905ec727ea7Spatrick        plist = os.path.join(output_dir, plist)
906ec727ea7Spatrick
907ec727ea7Spatrick        try:
908ec727ea7Spatrick            with open(plist, "rb") as plist_file:
909ec727ea7Spatrick                data = plistlib.load(plist_file)
910ec727ea7Spatrick            # Delete empty reports.
911ec727ea7Spatrick            if not data['files']:
912ec727ea7Spatrick                os.remove(plist)
913ec727ea7Spatrick                continue
914ec727ea7Spatrick
915ec727ea7Spatrick        except InvalidFileException as e:
916ec727ea7Spatrick            stderr(f"Error parsing plist file {plist}: {str(e)}")
917ec727ea7Spatrick            continue
918ec727ea7Spatrick
919ec727ea7Spatrick
920ec727ea7Spatrickdef clean_up_empty_folders(output_dir: str):
921ec727ea7Spatrick    """
922ec727ea7Spatrick    Remove empty folders from results, as git would not store them.
923ec727ea7Spatrick    """
924ec727ea7Spatrick    subdirs = glob.glob(output_dir + "/*")
925ec727ea7Spatrick    for subdir in subdirs:
926ec727ea7Spatrick        if not os.listdir(subdir):
927ec727ea7Spatrick            os.removedirs(subdir)
928ec727ea7Spatrick
929ec727ea7Spatrick
930ec727ea7Spatrickif __name__ == "__main__":
931ec727ea7Spatrick    print("SATestBuild.py should not be used on its own.")
932ec727ea7Spatrick    print("Please use 'SATest.py build' instead")
933ec727ea7Spatrick    sys.exit(1)
934