xref: /llvm-project/clang/utils/analyzer/SATestBuild.py (revision f82fb06cd1276bd358315e45cd3f4312b1319314)
1#!/usr/bin/env python
2
3"""
4Static Analyzer qualification infrastructure.
5
6The goal is to test the analyzer against different projects,
7check for failures, compare results, and measure performance.
8
9Repository Directory will contain sources of the projects as well as the
10information on how to build them and the expected output.
11Repository Directory structure:
12   - ProjectMap file
13   - Historical Performance Data
14   - Project Dir1
15     - ReferenceOutput
16   - Project Dir2
17     - ReferenceOutput
18   ..
19Note that the build tree must be inside the project dir.
20
21To test the build of the analyzer one would:
22   - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that
23     the build directory does not pollute the repository to min network
24     traffic).
25   - Build all projects, until error. Produce logs to report errors.
26   - Compare results.
27
28The files which should be kept around for failure investigations:
29   RepositoryCopy/Project DirI/ScanBuildResults
30   RepositoryCopy/Project DirI/run_static_analyzer.log
31
32Assumptions (TODO: shouldn't need to assume these.):
33   The script is being run from the Repository Directory.
34   The compiler for scan-build and scan-build are in the PATH.
35   export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
36
37For more logging, set the  env variables:
38   zaks:TI zaks$ export CCC_ANALYZER_LOG=1
39   zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
40
41The list of checkers tested are hardcoded in the Checkers variable.
42For testing additional checkers, use the SA_ADDITIONAL_CHECKERS environment
43variable. It should contain a comma separated list.
44"""
45import CmpRuns
46import SATestUtils as utils
47from ProjectMap import DownloadType, ProjectInfo
48
49import glob
50import logging
51import math
52import multiprocessing
53import os
54import plistlib
55import shutil
56import sys
57import threading
58import time
59import zipfile
60
61from queue import Queue
62
63# mypy has problems finding InvalidFileException in the module
64# and this is we can shush that false positive
65from plistlib import InvalidFileException  # type:ignore
66from subprocess import CalledProcessError, check_call
67from typing import Dict, IO, List, NamedTuple, Optional, TYPE_CHECKING, Tuple
68
69
70###############################################################################
71# Helper functions.
72###############################################################################
73
74
75class StreamToLogger:
76    def __init__(self, logger: logging.Logger, log_level: int = logging.INFO):
77        self.logger = logger
78        self.log_level = log_level
79
80    def write(self, message: str):
81        # Rstrip in order not to write an extra newline.
82        self.logger.log(self.log_level, message.rstrip())
83
84    def flush(self):
85        pass
86
87    def fileno(self) -> int:
88        return 0
89
90
91LOCAL = threading.local()
92
93
94def init_logger(name: str):
95    # TODO: use debug levels for VERBOSE messages
96    logger = logging.getLogger(name)
97    logger.setLevel(logging.DEBUG)
98    LOCAL.stdout = StreamToLogger(logger, logging.INFO)
99    LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
100
101
102init_logger("main")
103
104
105def stderr(message: str):
106    LOCAL.stderr.write(message)
107
108
109def stdout(message: str):
110    LOCAL.stdout.write(message)
111
112
113logging.basicConfig(format="%(asctime)s:%(levelname)s:%(name)s: %(message)s")
114
115
116###############################################################################
117# Configuration setup.
118###############################################################################
119
120
121# Find Clang for static analysis.
122if "CC" in os.environ:
123    cc_candidate: Optional[str] = os.environ["CC"]
124else:
125    cc_candidate = utils.which("clang", os.environ["PATH"])
126if not cc_candidate:
127    stderr("Error: cannot find 'clang' in PATH")
128    sys.exit(1)
129
130CLANG = cc_candidate
131
132# Number of jobs.
133MAX_JOBS = int(math.ceil(multiprocessing.cpu_count() * 0.75))
134
135# Names of the project specific scripts.
136# The script that downloads the project.
137DOWNLOAD_SCRIPT = "download_project.sh"
138# The script that needs to be executed before the build can start.
139CLEANUP_SCRIPT = "cleanup_run_static_analyzer.sh"
140# This is a file containing commands for scan-build.
141BUILD_SCRIPT = "run_static_analyzer.cmd"
142
143# A comment in a build script which disables wrapping.
144NO_PREFIX_CMD = "#NOPREFIX"
145
146# The log file name.
147LOG_DIR_NAME = "Logs"
148BUILD_LOG_NAME = "run_static_analyzer.log"
149# Summary file - contains the summary of the failures. Ex: This info can be be
150# displayed when buildbot detects a build failure.
151NUM_OF_FAILURES_IN_SUMMARY = 10
152
153# The scan-build result directory.
154OUTPUT_DIR_NAME = "ScanBuildResults"
155REF_PREFIX = "Ref"
156
157# The name of the directory storing the cached project source. If this
158# directory does not exist, the download script will be executed.
159# That script should create the "CachedSource" directory and download the
160# project source into it.
161CACHED_SOURCE_DIR_NAME = "CachedSource"
162
163# The name of the directory containing the source code that will be analyzed.
164# Each time a project is analyzed, a fresh copy of its CachedSource directory
165# will be copied to the PatchedSource directory and then the local patches
166# in PATCHFILE_NAME will be applied (if PATCHFILE_NAME exists).
167PATCHED_SOURCE_DIR_NAME = "PatchedSource"
168
169# The name of the patchfile specifying any changes that should be applied
170# to the CachedSource before analyzing.
171PATCHFILE_NAME = "changes_for_analyzer.patch"
172
173# The list of checkers used during analyzes.
174# Currently, consists of all the non-experimental checkers, plus a few alpha
175# checkers we don't want to regress on.
176CHECKERS = ",".join(
177    [
178        "alpha.unix.SimpleStream",
179        "optin.taint",
180        "cplusplus.NewDeleteLeaks",
181        "core",
182        "cplusplus",
183        "deadcode",
184        "security",
185        "unix",
186        "osx",
187        "nullability",
188    ]
189)
190
191VERBOSE = 0
192
193
194###############################################################################
195# Test harness logic.
196###############################################################################
197
198
199def run_cleanup_script(directory: str, build_log_file: IO):
200    """
201    Run pre-processing script if any.
202    """
203    cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
204    script_path = os.path.join(directory, CLEANUP_SCRIPT)
205
206    utils.run_script(
207        script_path,
208        build_log_file,
209        cwd,
210        out=LOCAL.stdout,
211        err=LOCAL.stderr,
212        verbose=VERBOSE,
213    )
214
215
216class TestInfo(NamedTuple):
217    """
218    Information about a project and settings for its analysis.
219    """
220
221    project: ProjectInfo
222    override_compiler: bool = False
223    extra_analyzer_config: str = ""
224    extra_checkers: str = ""
225    is_reference_build: bool = False
226    strictness: int = 0
227
228
229# typing package doesn't have a separate type for Queue, but has a generic stub
230# We still want to have a type-safe checked project queue, for this reason,
231# we specify generic type for mypy.
232#
233# It is a common workaround for this situation:
234# https://mypy.readthedocs.io/en/stable/common_issues.html#using-classes-that-are-generic-in-stubs-but-not-at-runtime
235if TYPE_CHECKING:
236    TestQueue = Queue[TestInfo]  # this is only processed by mypy
237else:
238    TestQueue = Queue  # this will be executed at runtime
239
240
241class RegressionTester:
242    """
243    A component aggregating all of the project testing.
244    """
245
246    def __init__(
247        self,
248        jobs: int,
249        projects: List[ProjectInfo],
250        override_compiler: bool,
251        extra_analyzer_config: str,
252        extra_checkers: str,
253        regenerate: bool,
254        strictness: bool,
255    ):
256        self.jobs = jobs
257        self.projects = projects
258        self.override_compiler = override_compiler
259        self.extra_analyzer_config = extra_analyzer_config
260        self.extra_checkers = extra_checkers
261        self.regenerate = regenerate
262        self.strictness = strictness
263
264    def test_all(self) -> bool:
265        projects_to_test: List[TestInfo] = []
266
267        # Test the projects.
268        for project in self.projects:
269            projects_to_test.append(
270                TestInfo(
271                    project,
272                    self.override_compiler,
273                    self.extra_analyzer_config,
274                    self.extra_checkers,
275                    self.regenerate,
276                    self.strictness,
277                )
278            )
279        if self.jobs <= 1:
280            return self._single_threaded_test_all(projects_to_test)
281        else:
282            return self._multi_threaded_test_all(projects_to_test)
283
284    def _single_threaded_test_all(self, projects_to_test: List[TestInfo]) -> bool:
285        """
286        Run all projects.
287        :return: whether tests have passed.
288        """
289        success = True
290        for project_info in projects_to_test:
291            tester = ProjectTester(project_info)
292            success &= tester.test()
293        return success
294
295    def _multi_threaded_test_all(self, projects_to_test: List[TestInfo]) -> bool:
296        """
297        Run each project in a separate thread.
298
299        This is OK despite GIL, as testing is blocked
300        on launching external processes.
301
302        :return: whether tests have passed.
303        """
304        tasks_queue = TestQueue()
305
306        for project_info in projects_to_test:
307            tasks_queue.put(project_info)
308
309        results_differ = threading.Event()
310        failure_flag = threading.Event()
311
312        for _ in range(self.jobs):
313            T = TestProjectThread(tasks_queue, results_differ, failure_flag)
314            T.start()
315
316        # Required to handle Ctrl-C gracefully.
317        while tasks_queue.unfinished_tasks:
318            time.sleep(0.1)  # Seconds.
319            if failure_flag.is_set():
320                stderr("Test runner crashed\n")
321                sys.exit(1)
322        return not results_differ.is_set()
323
324
325class ProjectTester:
326    """
327    A component aggregating testing for one project.
328    """
329
330    def __init__(self, test_info: TestInfo, silent: bool = False):
331        self.project = test_info.project
332        self.override_compiler = test_info.override_compiler
333        self.extra_analyzer_config = test_info.extra_analyzer_config
334        self.extra_checkers = test_info.extra_checkers
335        self.is_reference_build = test_info.is_reference_build
336        self.strictness = test_info.strictness
337        self.silent = silent
338
339    def test(self) -> bool:
340        """
341        Test a given project.
342        :return tests_passed: Whether tests have passed according
343        to the :param strictness: criteria.
344        """
345        if not self.project.enabled:
346            self.out(f" \n\n--- Skipping disabled project {self.project.name}\n")
347            return True
348
349        self.out(f" \n\n--- Building project {self.project.name}\n")
350
351        start_time = time.time()
352
353        project_dir = self.get_project_dir()
354        self.vout(f"  Build directory: {project_dir}.\n")
355
356        # Set the build results directory.
357        output_dir = self.get_output_dir()
358
359        self.build(project_dir, output_dir)
360        check_build(output_dir)
361
362        if self.is_reference_build:
363            cleanup_reference_results(output_dir)
364            passed = True
365        else:
366            passed = run_cmp_results(project_dir, self.strictness)
367
368        self.out(
369            f"Completed tests for project {self.project.name} "
370            f"(time: {time.time() - start_time:.2f}).\n"
371        )
372
373        return passed
374
375    def get_project_dir(self) -> str:
376        return os.path.join(os.path.abspath(os.curdir), self.project.name)
377
378    def get_output_dir(self) -> str:
379        if self.is_reference_build:
380            dirname = REF_PREFIX + OUTPUT_DIR_NAME
381        else:
382            dirname = OUTPUT_DIR_NAME
383
384        return os.path.join(self.get_project_dir(), dirname)
385
386    def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
387        build_log_path = get_build_log_path(output_dir)
388
389        self.out(f"Log file: {build_log_path}\n")
390        self.out(f"Output directory: {output_dir}\n")
391
392        remove_log_file(output_dir)
393
394        # Clean up scan build results.
395        if os.path.exists(output_dir):
396            self.vout(f"  Removing old results: {output_dir}\n")
397
398            shutil.rmtree(output_dir)
399
400        assert not os.path.exists(output_dir)
401        os.makedirs(os.path.join(output_dir, LOG_DIR_NAME))
402
403        # Build and analyze the project.
404        with open(build_log_path, "w+") as build_log_file:
405            if self.project.mode == 1:
406                self._download_and_patch(directory, build_log_file)
407                run_cleanup_script(directory, build_log_file)
408                build_time, memory = self.scan_build(
409                    directory, output_dir, build_log_file
410                )
411            else:
412                build_time, memory = self.analyze_preprocessed(directory, output_dir)
413
414            if self.is_reference_build:
415                run_cleanup_script(directory, build_log_file)
416                normalize_reference_results(directory, output_dir, self.project.mode)
417
418        self.out(
419            f"Build complete (time: {utils.time_to_str(build_time)}, "
420            f"peak memory: {utils.memory_to_str(memory)}). "
421            f"See the log for more details: {build_log_path}\n"
422        )
423
424        return build_time, memory
425
426    def scan_build(
427        self, directory: str, output_dir: str, build_log_file: IO
428    ) -> Tuple[float, int]:
429        """
430        Build the project with scan-build by reading in the commands and
431        prefixing them with the scan-build options.
432        """
433        build_script_path = os.path.join(directory, BUILD_SCRIPT)
434        if not os.path.exists(build_script_path):
435            stderr(f"Error: build script is not defined: " f"{build_script_path}\n")
436            sys.exit(1)
437
438        all_checkers = CHECKERS
439        if "SA_ADDITIONAL_CHECKERS" in os.environ:
440            all_checkers = all_checkers + "," + os.environ["SA_ADDITIONAL_CHECKERS"]
441        if self.extra_checkers != "":
442            all_checkers += "," + self.extra_checkers
443
444        # Run scan-build from within the patched source directory.
445        cwd = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
446
447        options = f"--use-analyzer '{CLANG}' "
448        options += f"-plist-html -o '{output_dir}' "
449        options += f"-enable-checker {all_checkers} "
450        options += "--keep-empty "
451        options += f"-analyzer-config '{self.generate_config()}' "
452
453        if self.override_compiler:
454            options += "--override-compiler "
455
456        extra_env: Dict[str, str] = {}
457
458        execution_time = 0.0
459        peak_memory = 0
460
461        try:
462            command_file = open(build_script_path, "r")
463            command_prefix = "scan-build " + options + " "
464
465            for command in command_file:
466                command = command.strip()
467
468                if len(command) == 0:
469                    continue
470
471                # Custom analyzer invocation specified by project.
472                # Communicate required information using environment variables
473                # instead.
474                if command == NO_PREFIX_CMD:
475                    command_prefix = ""
476                    extra_env["OUTPUT"] = output_dir
477                    extra_env["CC"] = CLANG
478                    extra_env["ANALYZER_CONFIG"] = self.generate_config()
479                    continue
480
481                if command.startswith("#"):
482                    continue
483
484                # If using 'make', auto imply a -jX argument
485                # to speed up analysis.  xcodebuild will
486                # automatically use the maximum number of cores.
487                if (
488                    command.startswith("make ") or command == "make"
489                ) and "-j" not in command:
490                    command += f" -j{MAX_JOBS}"
491
492                command_to_run = command_prefix + command
493
494                self.vout(f"  Executing: {command_to_run}\n")
495
496                time, mem = utils.check_and_measure_call(
497                    command_to_run,
498                    cwd=cwd,
499                    stderr=build_log_file,
500                    stdout=build_log_file,
501                    env=dict(os.environ, **extra_env),
502                    shell=True,
503                )
504
505                execution_time += time
506                peak_memory = max(peak_memory, mem)
507
508        except CalledProcessError:
509            stderr("Error: scan-build failed. Its output was: \n")
510            build_log_file.seek(0)
511            shutil.copyfileobj(build_log_file, LOCAL.stderr)
512            sys.exit(1)
513
514        return execution_time, peak_memory
515
516    def analyze_preprocessed(
517        self, directory: str, output_dir: str
518    ) -> Tuple[float, int]:
519        """
520        Run analysis on a set of preprocessed files.
521        """
522        if os.path.exists(os.path.join(directory, BUILD_SCRIPT)):
523            stderr(
524                f"Error: The preprocessed files project "
525                f"should not contain {BUILD_SCRIPT}\n"
526            )
527            raise Exception()
528
529        prefix = CLANG + " --analyze "
530
531        prefix += "--analyzer-output plist "
532        prefix += " -Xclang -analyzer-checker=" + CHECKERS
533        prefix += " -fcxx-exceptions -fblocks "
534        prefix += " -Xclang -analyzer-config "
535        prefix += f"-Xclang {self.generate_config()} "
536
537        if self.project.mode == 2:
538            prefix += "-std=c++11 "
539
540        plist_path = os.path.join(directory, output_dir, "date")
541        fail_path = os.path.join(plist_path, "failures")
542        os.makedirs(fail_path)
543
544        execution_time = 0.0
545        peak_memory = 0
546
547        for full_file_name in glob.glob(directory + "/*"):
548            file_name = os.path.basename(full_file_name)
549            failed = False
550
551            # Only run the analyzes on supported files.
552            if utils.has_no_extension(file_name):
553                continue
554            if not utils.is_valid_single_input_file(file_name):
555                stderr(f"Error: Invalid single input file {full_file_name}.\n")
556                raise Exception()
557
558            # Build and call the analyzer command.
559            plist_basename = os.path.join(plist_path, file_name)
560            output_option = f"-o '{plist_basename}.plist' "
561            command = f"{prefix}{output_option}'{file_name}'"
562
563            log_path = os.path.join(fail_path, file_name + ".stderr.txt")
564            with open(log_path, "w+") as log_file:
565                try:
566                    self.vout(f"  Executing: {command}\n")
567
568                    time, mem = utils.check_and_measure_call(
569                        command,
570                        cwd=directory,
571                        stderr=log_file,
572                        stdout=log_file,
573                        shell=True,
574                    )
575
576                    execution_time += time
577                    peak_memory = max(peak_memory, mem)
578
579                except CalledProcessError as e:
580                    stderr(
581                        f"Error: Analyzes of {full_file_name} failed. "
582                        f"See {log_file.name} for details. "
583                        f"Error code {e.returncode}.\n"
584                    )
585                    failed = True
586
587                # If command did not fail, erase the log file.
588                if not failed:
589                    os.remove(log_file.name)
590
591        return execution_time, peak_memory
592
593    def generate_config(self) -> str:
594        out = "serialize-stats=true,stable-report-filename=true"
595
596        if self.extra_analyzer_config:
597            out += "," + self.extra_analyzer_config
598
599        return out
600
601    def _download_and_patch(self, directory: str, build_log_file: IO):
602        """
603        Download the project and apply the local patchfile if it exists.
604        """
605        cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
606
607        # If the we don't already have the cached source, run the project's
608        # download script to download it.
609        if not os.path.exists(cached_source):
610            self._download(directory, build_log_file)
611            if not os.path.exists(cached_source):
612                stderr(f"Error: '{cached_source}' not found after download.\n")
613                exit(1)
614
615        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
616
617        # Remove potentially stale patched source.
618        if os.path.exists(patched_source):
619            shutil.rmtree(patched_source)
620
621        # Copy the cached source and apply any patches to the copy.
622        shutil.copytree(cached_source, patched_source, symlinks=True)
623        self._apply_patch(directory, build_log_file)
624
625    def _download(self, directory: str, build_log_file: IO):
626        """
627        Run the script to download the project, if it exists.
628        """
629        if self.project.source == DownloadType.GIT:
630            self._download_from_git(directory, build_log_file)
631        elif self.project.source == DownloadType.ZIP:
632            self._unpack_zip(directory, build_log_file)
633        elif self.project.source == DownloadType.SCRIPT:
634            self._run_download_script(directory, build_log_file)
635        else:
636            raise ValueError(
637                f"Unknown source type '{self.project.source}' is found "
638                f"for the '{self.project.name}' project"
639            )
640
641    def _download_from_git(self, directory: str, build_log_file: IO):
642        repo = self.project.origin
643        cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
644
645        check_call(
646            f"git clone --recursive {repo} {cached_source}",
647            cwd=directory,
648            stderr=build_log_file,
649            stdout=build_log_file,
650            shell=True,
651        )
652        check_call(
653            f"git checkout --quiet {self.project.commit}",
654            cwd=cached_source,
655            stderr=build_log_file,
656            stdout=build_log_file,
657            shell=True,
658        )
659
660    def _unpack_zip(self, directory: str, build_log_file: IO):
661        zip_files = list(glob.glob(directory + "/*.zip"))
662
663        if len(zip_files) == 0:
664            raise ValueError(
665                f"Couldn't find any zip files to unpack for the "
666                f"'{self.project.name}' project"
667            )
668
669        if len(zip_files) > 1:
670            raise ValueError(
671                f"Couldn't decide which of the zip files ({zip_files}) "
672                f"for the '{self.project.name}' project to unpack"
673            )
674
675        with zipfile.ZipFile(zip_files[0], "r") as zip_file:
676            zip_file.extractall(os.path.join(directory, CACHED_SOURCE_DIR_NAME))
677
678    @staticmethod
679    def _run_download_script(directory: str, build_log_file: IO):
680        script_path = os.path.join(directory, DOWNLOAD_SCRIPT)
681        utils.run_script(
682            script_path,
683            build_log_file,
684            directory,
685            out=LOCAL.stdout,
686            err=LOCAL.stderr,
687            verbose=VERBOSE,
688        )
689
690    def _apply_patch(self, directory: str, build_log_file: IO):
691        patchfile_path = os.path.join(directory, PATCHFILE_NAME)
692        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
693
694        if not os.path.exists(patchfile_path):
695            self.out("  No local patches.\n")
696            return
697
698        self.out("  Applying patch.\n")
699        try:
700            check_call(
701                f"patch -p1 < '{patchfile_path}'",
702                cwd=patched_source,
703                stderr=build_log_file,
704                stdout=build_log_file,
705                shell=True,
706            )
707
708        except CalledProcessError:
709            stderr(f"Error: Patch failed. " f"See {build_log_file.name} for details.\n")
710            sys.exit(1)
711
712    def out(self, what: str):
713        if not self.silent:
714            stdout(what)
715
716    def vout(self, what: str):
717        if VERBOSE >= 1:
718            self.out(what)
719
720
721class TestProjectThread(threading.Thread):
722    def __init__(
723        self,
724        tasks_queue: TestQueue,
725        results_differ: threading.Event,
726        failure_flag: threading.Event,
727    ):
728        """
729        :param results_differ: Used to signify that results differ from
730               the canonical ones.
731        :param failure_flag: Used to signify a failure during the run.
732        """
733        self.tasks_queue = tasks_queue
734        self.results_differ = results_differ
735        self.failure_flag = failure_flag
736        super().__init__()
737
738        # Needed to gracefully handle interrupts with Ctrl-C
739        self.daemon = True
740
741    def run(self):
742        while not self.tasks_queue.empty():
743            try:
744                test_info = self.tasks_queue.get()
745                init_logger(test_info.project.name)
746
747                tester = ProjectTester(test_info)
748                if not tester.test():
749                    self.results_differ.set()
750
751                self.tasks_queue.task_done()
752
753            except BaseException:
754                self.failure_flag.set()
755                raise
756
757
758###############################################################################
759# Utility functions.
760###############################################################################
761
762
763def check_build(output_dir: str):
764    """
765    Given the scan-build output directory, checks if the build failed
766    (by searching for the failures directories). If there are failures, it
767    creates a summary file in the output directory.
768
769    """
770    # Check if there are failures.
771    failures = glob.glob(output_dir + "/*/failures/*.stderr.txt")
772    total_failed = len(failures)
773
774    if total_failed == 0:
775        clean_up_empty_plists(output_dir)
776        clean_up_empty_folders(output_dir)
777
778        plists = glob.glob(output_dir + "/*/*.plist")
779        stdout(
780            f"Number of bug reports "
781            f"(non-empty plist files) produced: {len(plists)}\n"
782        )
783        return
784
785    stderr("Error: analysis failed.\n")
786    stderr(f"Total of {total_failed} failures discovered.\n")
787
788    if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
789        stderr(f"See the first {NUM_OF_FAILURES_IN_SUMMARY} below.\n")
790
791    for index, failed_log_path in enumerate(failures, start=1):
792        if index >= NUM_OF_FAILURES_IN_SUMMARY:
793            break
794
795        stderr(f"\n-- Error #{index} -----------\n")
796
797        with open(failed_log_path, "r") as failed_log:
798            shutil.copyfileobj(failed_log, LOCAL.stdout)
799
800    if total_failed > NUM_OF_FAILURES_IN_SUMMARY:
801        stderr("See the results folder for more.")
802
803    sys.exit(1)
804
805
806def cleanup_reference_results(output_dir: str):
807    """
808    Delete html, css, and js files from reference results. These can
809    include multiple copies of the benchmark source and so get very large.
810    """
811    extensions = ["html", "css", "js"]
812
813    for extension in extensions:
814        for file_to_rm in glob.glob(f"{output_dir}/*/*.{extension}"):
815            file_to_rm = os.path.join(output_dir, file_to_rm)
816            os.remove(file_to_rm)
817
818    # Remove the log file. It leaks absolute path names.
819    remove_log_file(output_dir)
820
821
822def run_cmp_results(directory: str, strictness: int = 0) -> bool:
823    """
824    Compare the warnings produced by scan-build.
825    strictness defines the success criteria for the test:
826      0 - success if there are no crashes or analyzer failure.
827      1 - success if there are no difference in the number of reported bugs.
828      2 - success if all the bug reports are identical.
829
830    :return success: Whether tests pass according to the strictness
831    criteria.
832    """
833    tests_passed = True
834    start_time = time.time()
835
836    ref_dir = os.path.join(directory, REF_PREFIX + OUTPUT_DIR_NAME)
837    new_dir = os.path.join(directory, OUTPUT_DIR_NAME)
838
839    # We have to go one level down the directory tree.
840    ref_list = glob.glob(ref_dir + "/*")
841    new_list = glob.glob(new_dir + "/*")
842
843    # Log folders are also located in the results dir, so ignore them.
844    ref_log_dir = os.path.join(ref_dir, LOG_DIR_NAME)
845    if ref_log_dir in ref_list:
846        ref_list.remove(ref_log_dir)
847    new_list.remove(os.path.join(new_dir, LOG_DIR_NAME))
848
849    if len(ref_list) != len(new_list):
850        stderr(f"Mismatch in number of results folders: " f"{ref_list} vs {new_list}")
851        sys.exit(1)
852
853    # There might be more then one folder underneath - one per each scan-build
854    # command (Ex: one for configure and one for make).
855    if len(ref_list) > 1:
856        # Assume that the corresponding folders have the same names.
857        ref_list.sort()
858        new_list.sort()
859
860    # Iterate and find the differences.
861    num_diffs = 0
862    for ref_dir, new_dir in zip(ref_list, new_list):
863        assert ref_dir != new_dir
864
865        if VERBOSE >= 1:
866            stdout(f"  Comparing Results: {ref_dir} {new_dir}\n")
867
868        patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
869
870        ref_results = CmpRuns.ResultsDirectory(ref_dir)
871        new_results = CmpRuns.ResultsDirectory(new_dir, patched_source)
872
873        # Scan the results, delete empty plist files.
874        (
875            num_diffs,
876            reports_in_ref,
877            reports_in_new,
878        ) = CmpRuns.dump_scan_build_results_diff(
879            ref_results, new_results, delete_empty=False, out=LOCAL.stdout
880        )
881
882        if num_diffs > 0:
883            stdout(f"Warning: {num_diffs} differences in diagnostics.\n")
884
885        if strictness >= 2 and num_diffs > 0:
886            stdout("Error: Diffs found in strict mode (2).\n")
887            tests_passed = False
888
889        elif strictness >= 1 and reports_in_ref != reports_in_new:
890            stdout("Error: The number of results are different " " strict mode (1).\n")
891            tests_passed = False
892
893    stdout(
894        f"Diagnostic comparison complete " f"(time: {time.time() - start_time:.2f}).\n"
895    )
896
897    return tests_passed
898
899
900def normalize_reference_results(directory: str, output_dir: str, build_mode: int):
901    """
902    Make the absolute paths relative in the reference results.
903    """
904    for dir_path, _, filenames in os.walk(output_dir):
905        for filename in filenames:
906            if not filename.endswith("plist"):
907                continue
908
909            plist = os.path.join(dir_path, filename)
910            with open(plist, "rb") as plist_file:
911                data = plistlib.load(plist_file)
912            path_prefix = directory
913
914            if build_mode == 1:
915                path_prefix = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
916
917            paths = [
918                source[len(path_prefix) + 1 :]
919                if source.startswith(path_prefix)
920                else source
921                for source in data["files"]
922            ]
923            data["files"] = paths
924
925            # Remove transient fields which change from run to run.
926            for diagnostic in data["diagnostics"]:
927                if "HTMLDiagnostics_files" in diagnostic:
928                    diagnostic.pop("HTMLDiagnostics_files")
929
930            if "clang_version" in data:
931                data.pop("clang_version")
932
933            with open(plist, "wb") as plist_file:
934                plistlib.dump(data, plist_file)
935
936
937def get_build_log_path(output_dir: str) -> str:
938    return os.path.join(output_dir, LOG_DIR_NAME, BUILD_LOG_NAME)
939
940
941def remove_log_file(output_dir: str):
942    build_log_path = get_build_log_path(output_dir)
943
944    # Clean up the log file.
945    if os.path.exists(build_log_path):
946        if VERBOSE >= 1:
947            stdout(f"  Removing log file: {build_log_path}\n")
948
949        os.remove(build_log_path)
950
951
952def clean_up_empty_plists(output_dir: str):
953    """
954    A plist file is created for each call to the analyzer(each source file).
955    We are only interested on the once that have bug reports,
956    so delete the rest.
957    """
958    for plist in glob.glob(output_dir + "/*/*.plist"):
959        plist = os.path.join(output_dir, plist)
960
961        try:
962            with open(plist, "rb") as plist_file:
963                data = plistlib.load(plist_file)
964            # Delete empty reports.
965            if not data["files"]:
966                os.remove(plist)
967                continue
968
969        except InvalidFileException as e:
970            stderr(f"Error parsing plist file {plist}: {str(e)}")
971            continue
972
973
974def clean_up_empty_folders(output_dir: str):
975    """
976    Remove empty folders from results, as git would not store them.
977    """
978    subdirs = glob.glob(output_dir + "/*")
979    for subdir in subdirs:
980        if not os.listdir(subdir):
981            os.removedirs(subdir)
982
983
984if __name__ == "__main__":
985    print("SATestBuild.py should not be used on its own.")
986    print("Please use 'SATest.py build' instead")
987    sys.exit(1)
988