xref: /llvm-project/llvm/utils/collect_and_build_with_pgo.py (revision 040f1c78117f9ec90d951f7560198ec9b2fcaf65)
1cf477f4eSGeorge Burgess IV#!/usr/bin/env python3
2cf477f4eSGeorge Burgess IV"""
3cf477f4eSGeorge Burgess IVThis script:
4cf477f4eSGeorge Burgess IV- Builds clang with user-defined flags
5cf477f4eSGeorge Burgess IV- Uses that clang to build an instrumented clang, which can be used to collect
6cf477f4eSGeorge Burgess IV  PGO samples
7cf477f4eSGeorge Burgess IV- Builds a user-defined set of sources (default: clang) to act as a
8cf477f4eSGeorge Burgess IV  "benchmark" to generate a PGO profile
9cf477f4eSGeorge Burgess IV- Builds clang once more with the PGO profile generated above
10cf477f4eSGeorge Burgess IV
11cf477f4eSGeorge Burgess IVThis is a total of four clean builds of clang (by default). This may take a
12cf477f4eSGeorge Burgess IVwhile. :)
13b570f82fSNico Weber
14b570f82fSNico WeberThis scripts duplicates https://llvm.org/docs/AdvancedBuilds.html#multi-stage-pgo
15b570f82fSNico WeberEventually, it will be updated to instead call the cmake cache mentioned there.
16cf477f4eSGeorge Burgess IV"""
17cf477f4eSGeorge Burgess IV
18cf477f4eSGeorge Burgess IVimport argparse
19cf477f4eSGeorge Burgess IVimport collections
20cf477f4eSGeorge Burgess IVimport multiprocessing
21cf477f4eSGeorge Burgess IVimport os
22cf477f4eSGeorge Burgess IVimport shlex
23cf477f4eSGeorge Burgess IVimport shutil
24cf477f4eSGeorge Burgess IVimport subprocess
25cf477f4eSGeorge Burgess IVimport sys
26cf477f4eSGeorge Burgess IV
27cf477f4eSGeorge Burgess IV### User configuration
28cf477f4eSGeorge Burgess IV
29cf477f4eSGeorge Burgess IV
30cf477f4eSGeorge Burgess IV# If you want to use a different 'benchmark' than building clang, make this
31cf477f4eSGeorge Burgess IV# function do what you want. out_dir is the build directory for clang, so all
32cf477f4eSGeorge Burgess IV# of the clang binaries will live under "${out_dir}/bin/". Using clang in
33cf477f4eSGeorge Burgess IV# ${out_dir} will magically have the profiles go to the right place.
34cf477f4eSGeorge Burgess IV#
35cf477f4eSGeorge Burgess IV# You may assume that out_dir is a freshly-built directory that you can reach
36cf477f4eSGeorge Burgess IV# in to build more things, if you'd like.
37cf477f4eSGeorge Burgess IVdef _run_benchmark(env, out_dir, include_debug_info):
38cf477f4eSGeorge Burgess IV    """The 'benchmark' we run to generate profile data."""
39b71edfaaSTobias Hieta    target_dir = env.output_subdir("instrumentation_run")
40cf477f4eSGeorge Burgess IV
41ab7a3ad4SFangrui Song    # `check-llvm` and `check-clang` are cheap ways to increase coverage. The
42ab7a3ad4SFangrui Song    # former lets us touch on the non-x86 backends a bit if configured, and the
43ab7a3ad4SFangrui Song    # latter gives us more C to chew on (and will send us through diagnostic
44ab7a3ad4SFangrui Song    # paths a fair amount, though the `if (stuff_is_broken) { diag() ... }`
45ab7a3ad4SFangrui Song    # branches should still heavily be weighted in the not-taken direction,
46ab7a3ad4SFangrui Song    # since we built all of LLVM/etc).
47b71edfaaSTobias Hieta    _build_things_in(env, out_dir, what=["check-llvm", "check-clang"])
48ab7a3ad4SFangrui Song
49cf477f4eSGeorge Burgess IV    # Building tblgen gets us coverage; don't skip it. (out_dir may also not
50cf477f4eSGeorge Burgess IV    # have them anyway, but that's less of an issue)
51b71edfaaSTobias Hieta    cmake = _get_cmake_invocation_for_bootstrap_from(env, out_dir, skip_tablegens=False)
52cf477f4eSGeorge Burgess IV
53cf477f4eSGeorge Burgess IV    if include_debug_info:
54b71edfaaSTobias Hieta        cmake.add_flag("CMAKE_BUILD_TYPE", "RelWithDebInfo")
55cf477f4eSGeorge Burgess IV
56cf477f4eSGeorge Burgess IV    _run_fresh_cmake(env, cmake, target_dir)
57cf477f4eSGeorge Burgess IV
58cf477f4eSGeorge Burgess IV    # Just build all the things. The more data we have, the better.
59b71edfaaSTobias Hieta    _build_things_in(env, target_dir, what=["all"])
60b71edfaaSTobias Hieta
61cf477f4eSGeorge Burgess IV
62cf477f4eSGeorge Burgess IV### Script
63cf477f4eSGeorge Burgess IV
64cf477f4eSGeorge Burgess IV
65cf477f4eSGeorge Burgess IVclass CmakeInvocation:
66b71edfaaSTobias Hieta    _cflags = ["CMAKE_C_FLAGS", "CMAKE_CXX_FLAGS"]
67cf477f4eSGeorge Burgess IV    _ldflags = [
68b71edfaaSTobias Hieta        "CMAKE_EXE_LINKER_FLAGS",
69b71edfaaSTobias Hieta        "CMAKE_MODULE_LINKER_FLAGS",
70b71edfaaSTobias Hieta        "CMAKE_SHARED_LINKER_FLAGS",
71cf477f4eSGeorge Burgess IV    ]
72cf477f4eSGeorge Burgess IV
73cf477f4eSGeorge Burgess IV    def __init__(self, cmake, maker, cmake_dir):
74b71edfaaSTobias Hieta        self._prefix = [cmake, "-G", maker, cmake_dir]
75cf477f4eSGeorge Burgess IV
76cf477f4eSGeorge Burgess IV        # Map of str -> (list|str).
77cf477f4eSGeorge Burgess IV        self._flags = {}
78cf477f4eSGeorge Burgess IV        for flag in CmakeInvocation._cflags + CmakeInvocation._ldflags:
79cf477f4eSGeorge Burgess IV            self._flags[flag] = []
80cf477f4eSGeorge Burgess IV
81cf477f4eSGeorge Burgess IV    def add_new_flag(self, key, value):
82cf477f4eSGeorge Burgess IV        self.add_flag(key, value, allow_overwrites=False)
83cf477f4eSGeorge Burgess IV
84cf477f4eSGeorge Burgess IV    def add_flag(self, key, value, allow_overwrites=True):
85cf477f4eSGeorge Burgess IV        if key not in self._flags:
86cf477f4eSGeorge Burgess IV            self._flags[key] = value
87cf477f4eSGeorge Burgess IV            return
88cf477f4eSGeorge Burgess IV
89cf477f4eSGeorge Burgess IV        existing_value = self._flags[key]
90cf477f4eSGeorge Burgess IV        if isinstance(existing_value, list):
91cf477f4eSGeorge Burgess IV            existing_value.append(value)
92cf477f4eSGeorge Burgess IV            return
93cf477f4eSGeorge Burgess IV
94cf477f4eSGeorge Burgess IV        if not allow_overwrites:
95b71edfaaSTobias Hieta            raise ValueError("Invalid overwrite of %s requested" % key)
96cf477f4eSGeorge Burgess IV
97cf477f4eSGeorge Burgess IV        self._flags[key] = value
98cf477f4eSGeorge Burgess IV
99cf477f4eSGeorge Burgess IV    def add_cflags(self, flags):
100cf477f4eSGeorge Burgess IV        # No, I didn't intend to append ['-', 'O', '2'] to my flags, thanks :)
101cf477f4eSGeorge Burgess IV        assert not isinstance(flags, str)
102cf477f4eSGeorge Burgess IV        for f in CmakeInvocation._cflags:
103cf477f4eSGeorge Burgess IV            self._flags[f].extend(flags)
104cf477f4eSGeorge Burgess IV
105cf477f4eSGeorge Burgess IV    def add_ldflags(self, flags):
106cf477f4eSGeorge Burgess IV        assert not isinstance(flags, str)
107cf477f4eSGeorge Burgess IV        for f in CmakeInvocation._ldflags:
108cf477f4eSGeorge Burgess IV            self._flags[f].extend(flags)
109cf477f4eSGeorge Burgess IV
110cf477f4eSGeorge Burgess IV    def to_args(self):
111cf477f4eSGeorge Burgess IV        args = self._prefix.copy()
112cf477f4eSGeorge Burgess IV        for key, value in sorted(self._flags.items()):
113cf477f4eSGeorge Burgess IV            if isinstance(value, list):
114cf477f4eSGeorge Burgess IV                # We preload all of the list-y values (cflags, ...). If we've
115cf477f4eSGeorge Burgess IV                # nothing to add, don't.
116cf477f4eSGeorge Burgess IV                if not value:
117cf477f4eSGeorge Burgess IV                    continue
118b71edfaaSTobias Hieta                value = " ".join(value)
119cf477f4eSGeorge Burgess IV
120b71edfaaSTobias Hieta            arg = "-D" + key
121b71edfaaSTobias Hieta            if value != "":
122b71edfaaSTobias Hieta                arg += "=" + value
123cf477f4eSGeorge Burgess IV            args.append(arg)
124cf477f4eSGeorge Burgess IV        return args
125cf477f4eSGeorge Burgess IV
126cf477f4eSGeorge Burgess IV
127cf477f4eSGeorge Burgess IVclass Env:
128b71edfaaSTobias Hieta    def __init__(self, llvm_dir, use_make, output_dir, default_cmake_args, dry_run):
129cf477f4eSGeorge Burgess IV        self.llvm_dir = llvm_dir
130cf477f4eSGeorge Burgess IV        self.use_make = use_make
131cf477f4eSGeorge Burgess IV        self.output_dir = output_dir
132cf477f4eSGeorge Burgess IV        self.default_cmake_args = default_cmake_args.copy()
133cf477f4eSGeorge Burgess IV        self.dry_run = dry_run
134cf477f4eSGeorge Burgess IV
135cf477f4eSGeorge Burgess IV    def get_default_cmake_args_kv(self):
136cf477f4eSGeorge Burgess IV        return self.default_cmake_args.items()
137cf477f4eSGeorge Burgess IV
138cf477f4eSGeorge Burgess IV    def get_cmake_maker(self):
139b71edfaaSTobias Hieta        return "Ninja" if not self.use_make else "Unix Makefiles"
140cf477f4eSGeorge Burgess IV
141cf477f4eSGeorge Burgess IV    def get_make_command(self):
142cf477f4eSGeorge Burgess IV        if self.use_make:
143b71edfaaSTobias Hieta            return ["make", "-j{}".format(multiprocessing.cpu_count())]
144b71edfaaSTobias Hieta        return ["ninja"]
145cf477f4eSGeorge Burgess IV
146cf477f4eSGeorge Burgess IV    def output_subdir(self, name):
147cf477f4eSGeorge Burgess IV        return os.path.join(self.output_dir, name)
148cf477f4eSGeorge Burgess IV
149cf477f4eSGeorge Burgess IV    def has_llvm_subproject(self, name):
150b71edfaaSTobias Hieta        if name == "compiler-rt":
151b71edfaaSTobias Hieta            subdir = "../compiler-rt"
152b71edfaaSTobias Hieta        elif name == "clang":
153b71edfaaSTobias Hieta            subdir = "../clang"
154cf477f4eSGeorge Burgess IV        else:
155b71edfaaSTobias Hieta            raise ValueError("Unknown subproject: %s" % name)
156cf477f4eSGeorge Burgess IV
157cf477f4eSGeorge Burgess IV        return os.path.isdir(os.path.join(self.llvm_dir, subdir))
158cf477f4eSGeorge Burgess IV
159cf477f4eSGeorge Burgess IV    # Note that we don't allow capturing stdout/stderr. This works quite nicely
160cf477f4eSGeorge Burgess IV    # with dry_run.
161b71edfaaSTobias Hieta    def run_command(self, cmd, cwd=None, check=False, silent_unless_error=False):
162b71edfaaSTobias Hieta        print("Running `%s` in %s" % (cmd, shlex.quote(cwd or os.getcwd())))
163cf477f4eSGeorge Burgess IV
164cf477f4eSGeorge Burgess IV        if self.dry_run:
165cf477f4eSGeorge Burgess IV            return
166cf477f4eSGeorge Burgess IV
167cf477f4eSGeorge Burgess IV        if silent_unless_error:
168cf477f4eSGeorge Burgess IV            stdout, stderr = subprocess.PIPE, subprocess.STDOUT
169cf477f4eSGeorge Burgess IV        else:
170cf477f4eSGeorge Burgess IV            stdout, stderr = None, None
171cf477f4eSGeorge Burgess IV
172cf477f4eSGeorge Burgess IV        # Don't use subprocess.run because it's >= py3.5 only, and it's not too
173cf477f4eSGeorge Burgess IV        # much extra effort to get what it gives us anyway.
174cf477f4eSGeorge Burgess IV        popen = subprocess.Popen(
175b71edfaaSTobias Hieta            cmd, stdin=subprocess.DEVNULL, stdout=stdout, stderr=stderr, cwd=cwd
176b71edfaaSTobias Hieta        )
177cf477f4eSGeorge Burgess IV        stdout, _ = popen.communicate()
178cf477f4eSGeorge Burgess IV        return_code = popen.wait(timeout=0)
179cf477f4eSGeorge Burgess IV
180cf477f4eSGeorge Burgess IV        if not return_code:
181cf477f4eSGeorge Burgess IV            return
182cf477f4eSGeorge Burgess IV
183cf477f4eSGeorge Burgess IV        if silent_unless_error:
184b71edfaaSTobias Hieta            print(stdout.decode("utf-8", "ignore"))
185cf477f4eSGeorge Burgess IV
186cf477f4eSGeorge Burgess IV        if check:
187cf477f4eSGeorge Burgess IV            raise subprocess.CalledProcessError(
188b71edfaaSTobias Hieta                returncode=return_code, cmd=cmd, output=stdout, stderr=None
189b71edfaaSTobias Hieta            )
190cf477f4eSGeorge Burgess IV
191cf477f4eSGeorge Burgess IV
192cf477f4eSGeorge Burgess IVdef _get_default_cmake_invocation(env):
193cf477f4eSGeorge Burgess IV    inv = CmakeInvocation(
194b71edfaaSTobias Hieta        cmake="cmake", maker=env.get_cmake_maker(), cmake_dir=env.llvm_dir
195b71edfaaSTobias Hieta    )
196cf477f4eSGeorge Burgess IV    for key, value in env.get_default_cmake_args_kv():
197cf477f4eSGeorge Burgess IV        inv.add_new_flag(key, value)
198cf477f4eSGeorge Burgess IV    return inv
199cf477f4eSGeorge Burgess IV
200cf477f4eSGeorge Burgess IV
201b71edfaaSTobias Hietadef _get_cmake_invocation_for_bootstrap_from(env, out_dir, skip_tablegens=True):
202b71edfaaSTobias Hieta    clang = os.path.join(out_dir, "bin", "clang")
203cf477f4eSGeorge Burgess IV    cmake = _get_default_cmake_invocation(env)
204b71edfaaSTobias Hieta    cmake.add_new_flag("CMAKE_C_COMPILER", clang)
205b71edfaaSTobias Hieta    cmake.add_new_flag("CMAKE_CXX_COMPILER", clang + "++")
206cf477f4eSGeorge Burgess IV
207cf477f4eSGeorge Burgess IV    # We often get no value out of building new tblgens; the previous build
208cf477f4eSGeorge Burgess IV    # should have them. It's still correct to build them, just slower.
209cf477f4eSGeorge Burgess IV    def add_tablegen(key, binary):
210b71edfaaSTobias Hieta        path = os.path.join(out_dir, "bin", binary)
211cf477f4eSGeorge Burgess IV
212cf477f4eSGeorge Burgess IV        # Check that this exists, since the user's allowed to specify their own
213cf477f4eSGeorge Burgess IV        # stage1 directory (which is generally where we'll source everything
214cf477f4eSGeorge Burgess IV        # from). Dry runs should hope for the best from our user, as well.
215cf477f4eSGeorge Burgess IV        if env.dry_run or os.path.exists(path):
216cf477f4eSGeorge Burgess IV            cmake.add_new_flag(key, path)
217cf477f4eSGeorge Burgess IV
218cf477f4eSGeorge Burgess IV    if skip_tablegens:
219b71edfaaSTobias Hieta        add_tablegen("LLVM_TABLEGEN", "llvm-tblgen")
220b71edfaaSTobias Hieta        add_tablegen("CLANG_TABLEGEN", "clang-tblgen")
221cf477f4eSGeorge Burgess IV
222cf477f4eSGeorge Burgess IV    return cmake
223cf477f4eSGeorge Burgess IV
224cf477f4eSGeorge Burgess IV
225cf477f4eSGeorge Burgess IVdef _build_things_in(env, target_dir, what):
226cf477f4eSGeorge Burgess IV    cmd = env.get_make_command() + what
227cf477f4eSGeorge Burgess IV    env.run_command(cmd, cwd=target_dir, check=True)
228cf477f4eSGeorge Burgess IV
229cf477f4eSGeorge Burgess IV
230cf477f4eSGeorge Burgess IVdef _run_fresh_cmake(env, cmake, target_dir):
231cf477f4eSGeorge Burgess IV    if not env.dry_run:
232cf477f4eSGeorge Burgess IV        try:
233cf477f4eSGeorge Burgess IV            shutil.rmtree(target_dir)
234cf477f4eSGeorge Burgess IV        except FileNotFoundError:
235cf477f4eSGeorge Burgess IV            pass
236cf477f4eSGeorge Burgess IV
237cf477f4eSGeorge Burgess IV        os.makedirs(target_dir, mode=0o755)
238cf477f4eSGeorge Burgess IV
239cf477f4eSGeorge Burgess IV    cmake_args = cmake.to_args()
240b71edfaaSTobias Hieta    env.run_command(cmake_args, cwd=target_dir, check=True, silent_unless_error=True)
241cf477f4eSGeorge Burgess IV
242cf477f4eSGeorge Burgess IV
243cf477f4eSGeorge Burgess IVdef _build_stage1_clang(env):
244b71edfaaSTobias Hieta    target_dir = env.output_subdir("stage1")
245cf477f4eSGeorge Burgess IV    cmake = _get_default_cmake_invocation(env)
246cf477f4eSGeorge Burgess IV    _run_fresh_cmake(env, cmake, target_dir)
247b71edfaaSTobias Hieta    _build_things_in(env, target_dir, what=["clang", "llvm-profdata", "profile"])
248cf477f4eSGeorge Burgess IV    return target_dir
249cf477f4eSGeorge Burgess IV
250cf477f4eSGeorge Burgess IV
251b71edfaaSTobias Hietadef _generate_instrumented_clang_profile(env, stage1_dir, profile_dir, output_file):
252b71edfaaSTobias Hieta    llvm_profdata = os.path.join(stage1_dir, "bin", "llvm-profdata")
253cf477f4eSGeorge Burgess IV    if env.dry_run:
254b71edfaaSTobias Hieta        profiles = [os.path.join(profile_dir, "*.profraw")]
255cf477f4eSGeorge Burgess IV    else:
256cf477f4eSGeorge Burgess IV        profiles = [
257b71edfaaSTobias Hieta            os.path.join(profile_dir, f)
258b71edfaaSTobias Hieta            for f in os.listdir(profile_dir)
259b71edfaaSTobias Hieta            if f.endswith(".profraw")
260cf477f4eSGeorge Burgess IV        ]
261b71edfaaSTobias Hieta    cmd = [llvm_profdata, "merge", "-output=" + output_file] + profiles
262cf477f4eSGeorge Burgess IV    env.run_command(cmd, check=True)
263cf477f4eSGeorge Burgess IV
264cf477f4eSGeorge Burgess IV
265cf477f4eSGeorge Burgess IVdef _build_instrumented_clang(env, stage1_dir):
266cf477f4eSGeorge Burgess IV    assert os.path.isabs(stage1_dir)
267cf477f4eSGeorge Burgess IV
268b71edfaaSTobias Hieta    target_dir = os.path.join(env.output_dir, "instrumented")
269cf477f4eSGeorge Burgess IV    cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)
270b71edfaaSTobias Hieta    cmake.add_new_flag("LLVM_BUILD_INSTRUMENTED", "IR")
271cf477f4eSGeorge Burgess IV
272cf477f4eSGeorge Burgess IV    # libcxx's configure step messes with our link order: we'll link
273cf477f4eSGeorge Burgess IV    # libclang_rt.profile after libgcc, and the former requires atexit from the
274cf477f4eSGeorge Burgess IV    # latter. So, configure checks fail.
275cf477f4eSGeorge Burgess IV    #
276cf477f4eSGeorge Burgess IV    # Since we don't need libcxx or compiler-rt anyway, just disable them.
277b71edfaaSTobias Hieta    cmake.add_new_flag("LLVM_BUILD_RUNTIME", "No")
278cf477f4eSGeorge Burgess IV
279cf477f4eSGeorge Burgess IV    _run_fresh_cmake(env, cmake, target_dir)
280b71edfaaSTobias Hieta    _build_things_in(env, target_dir, what=["clang", "lld"])
281cf477f4eSGeorge Burgess IV
282b71edfaaSTobias Hieta    profiles_dir = os.path.join(target_dir, "profiles")
283cf477f4eSGeorge Burgess IV    return target_dir, profiles_dir
284cf477f4eSGeorge Burgess IV
285cf477f4eSGeorge Burgess IV
286cf477f4eSGeorge Burgess IVdef _build_optimized_clang(env, stage1_dir, profdata_file):
287cf477f4eSGeorge Burgess IV    if not env.dry_run and not os.path.exists(profdata_file):
288b71edfaaSTobias Hieta        raise ValueError(
289b71edfaaSTobias Hieta            "Looks like the profdata file at %s doesn't exist" % profdata_file
290b71edfaaSTobias Hieta        )
291cf477f4eSGeorge Burgess IV
292b71edfaaSTobias Hieta    target_dir = os.path.join(env.output_dir, "optimized")
293cf477f4eSGeorge Burgess IV    cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)
294b71edfaaSTobias Hieta    cmake.add_new_flag("LLVM_PROFDATA_FILE", os.path.abspath(profdata_file))
295cf477f4eSGeorge Burgess IV
296cf477f4eSGeorge Burgess IV    # We'll get complaints about hash mismatches in `main` in tools/etc. Ignore
297cf477f4eSGeorge Burgess IV    # it.
298b71edfaaSTobias Hieta    cmake.add_cflags(["-Wno-backend-plugin"])
299cf477f4eSGeorge Burgess IV    _run_fresh_cmake(env, cmake, target_dir)
300b71edfaaSTobias Hieta    _build_things_in(env, target_dir, what=["clang"])
301cf477f4eSGeorge Burgess IV    return target_dir
302cf477f4eSGeorge Burgess IV
303cf477f4eSGeorge Burgess IV
304b71edfaaSTobias HietaArgs = collections.namedtuple(
305b71edfaaSTobias Hieta    "Args",
306b71edfaaSTobias Hieta    [
307b71edfaaSTobias Hieta        "do_optimized_build",
308b71edfaaSTobias Hieta        "include_debug_info",
309b71edfaaSTobias Hieta        "profile_location",
310b71edfaaSTobias Hieta        "stage1_dir",
311b71edfaaSTobias Hieta    ],
312b71edfaaSTobias Hieta)
313cf477f4eSGeorge Burgess IV
314cf477f4eSGeorge Burgess IV
315cf477f4eSGeorge Burgess IVdef _parse_args():
316cf477f4eSGeorge Burgess IV    parser = argparse.ArgumentParser(
317b71edfaaSTobias Hieta        description="Builds LLVM and Clang with instrumentation, collects "
318b71edfaaSTobias Hieta        "instrumentation profiles for them, and (optionally) builds things "
319b71edfaaSTobias Hieta        "with these PGO profiles. By default, it's assumed that you're "
320b71edfaaSTobias Hieta        "running this from your LLVM root, and all build artifacts will be "
321b71edfaaSTobias Hieta        "saved to $PWD/out."
322b71edfaaSTobias Hieta    )
323cf477f4eSGeorge Burgess IV    parser.add_argument(
324b71edfaaSTobias Hieta        "--cmake-extra-arg",
325b71edfaaSTobias Hieta        action="append",
326cf477f4eSGeorge Burgess IV        default=[],
327b71edfaaSTobias Hieta        help="an extra arg to pass to all cmake invocations. Note that this "
328b71edfaaSTobias Hieta        "is interpreted as a -D argument, e.g. --cmake-extra-arg FOO=BAR will "
329b71edfaaSTobias Hieta        "be passed as -DFOO=BAR. This may be specified multiple times.",
330b71edfaaSTobias Hieta    )
331cf477f4eSGeorge Burgess IV    parser.add_argument(
332b71edfaaSTobias Hieta        "--dry-run", action="store_true", help="print commands instead of running them"
333b71edfaaSTobias Hieta    )
334cf477f4eSGeorge Burgess IV    parser.add_argument(
335b71edfaaSTobias Hieta        "--llvm-dir",
336b71edfaaSTobias Hieta        default=".",
337b71edfaaSTobias Hieta        help="directory containing an LLVM checkout (default: $PWD)",
338b71edfaaSTobias Hieta    )
339cf477f4eSGeorge Burgess IV    parser.add_argument(
340b71edfaaSTobias Hieta        "--no-optimized-build",
341b71edfaaSTobias Hieta        action="store_true",
342b71edfaaSTobias Hieta        help="disable the final, PGO-optimized build",
343b71edfaaSTobias Hieta    )
344cf477f4eSGeorge Burgess IV    parser.add_argument(
345b71edfaaSTobias Hieta        "--out-dir", help="directory to write artifacts to (default: $llvm_dir/out)"
346b71edfaaSTobias Hieta    )
347cf477f4eSGeorge Burgess IV    parser.add_argument(
348b71edfaaSTobias Hieta        "--profile-output",
349b71edfaaSTobias Hieta        help="where to output the profile (default is $out/pgo_profile.prof)",
350b71edfaaSTobias Hieta    )
351cf477f4eSGeorge Burgess IV    parser.add_argument(
352b71edfaaSTobias Hieta        "--stage1-dir",
353b71edfaaSTobias Hieta        help="instead of having an initial build of everything, use the given "
354b71edfaaSTobias Hieta        "directory. It is expected that this directory will have clang, "
355b71edfaaSTobias Hieta        "llvm-profdata, and the appropriate libclang_rt.profile already built",
356b71edfaaSTobias Hieta    )
357cf477f4eSGeorge Burgess IV    parser.add_argument(
358b71edfaaSTobias Hieta        "--use-debug-info-in-benchmark",
359b71edfaaSTobias Hieta        action="store_true",
360b71edfaaSTobias Hieta        help="use a regular build instead of RelWithDebInfo in the benchmark. "
361b71edfaaSTobias Hieta        "This increases benchmark execution time and disk space requirements, "
362b71edfaaSTobias Hieta        "but gives more coverage over debuginfo bits in LLVM and clang.",
363b71edfaaSTobias Hieta    )
364cf477f4eSGeorge Burgess IV    parser.add_argument(
365b71edfaaSTobias Hieta        "--use-make",
366b71edfaaSTobias Hieta        action="store_true",
367b71edfaaSTobias Hieta        default=shutil.which("ninja") is None,
368b71edfaaSTobias Hieta        help="use Makefiles instead of ninja",
369b71edfaaSTobias Hieta    )
370cf477f4eSGeorge Burgess IV
371cf477f4eSGeorge Burgess IV    args = parser.parse_args()
372cf477f4eSGeorge Burgess IV
373cf477f4eSGeorge Burgess IV    llvm_dir = os.path.abspath(args.llvm_dir)
374cf477f4eSGeorge Burgess IV    if args.out_dir is None:
375b71edfaaSTobias Hieta        output_dir = os.path.join(llvm_dir, "out")
376cf477f4eSGeorge Burgess IV    else:
377cf477f4eSGeorge Burgess IV        output_dir = os.path.abspath(args.out_dir)
378cf477f4eSGeorge Burgess IV
379b71edfaaSTobias Hieta    extra_args = {
380b71edfaaSTobias Hieta        "CMAKE_BUILD_TYPE": "Release",
381b71edfaaSTobias Hieta        "LLVM_ENABLE_PROJECTS": "clang;compiler-rt;lld",
382b71edfaaSTobias Hieta    }
383cf477f4eSGeorge Burgess IV    for arg in args.cmake_extra_arg:
384b71edfaaSTobias Hieta        if arg.startswith("-D"):
385cf477f4eSGeorge Burgess IV            arg = arg[2:]
386b71edfaaSTobias Hieta        elif arg.startswith("-"):
387b71edfaaSTobias Hieta            raise ValueError(
388b71edfaaSTobias Hieta                "Unknown not- -D arg encountered; you may need "
389b71edfaaSTobias Hieta                "to tweak the source..."
390b71edfaaSTobias Hieta            )
391b71edfaaSTobias Hieta        split = arg.split("=", 1)
392cf477f4eSGeorge Burgess IV        if len(split) == 1:
393b71edfaaSTobias Hieta            key, val = split[0], ""
394cf477f4eSGeorge Burgess IV        else:
395cf477f4eSGeorge Burgess IV            key, val = split
396cf477f4eSGeorge Burgess IV        extra_args[key] = val
397cf477f4eSGeorge Burgess IV
398cf477f4eSGeorge Burgess IV    env = Env(
399cf477f4eSGeorge Burgess IV        default_cmake_args=extra_args,
400cf477f4eSGeorge Burgess IV        dry_run=args.dry_run,
401cf477f4eSGeorge Burgess IV        llvm_dir=llvm_dir,
402cf477f4eSGeorge Burgess IV        output_dir=output_dir,
403cf477f4eSGeorge Burgess IV        use_make=args.use_make,
404cf477f4eSGeorge Burgess IV    )
405cf477f4eSGeorge Burgess IV
406cf477f4eSGeorge Burgess IV    if args.profile_output is not None:
407cf477f4eSGeorge Burgess IV        profile_location = args.profile_output
408cf477f4eSGeorge Burgess IV    else:
409b71edfaaSTobias Hieta        profile_location = os.path.join(env.output_dir, "pgo_profile.prof")
410cf477f4eSGeorge Burgess IV
411cf477f4eSGeorge Burgess IV    result_args = Args(
412cf477f4eSGeorge Burgess IV        do_optimized_build=not args.no_optimized_build,
413cf477f4eSGeorge Burgess IV        include_debug_info=args.use_debug_info_in_benchmark,
414cf477f4eSGeorge Burgess IV        profile_location=profile_location,
415cf477f4eSGeorge Burgess IV        stage1_dir=args.stage1_dir,
416cf477f4eSGeorge Burgess IV    )
417cf477f4eSGeorge Burgess IV
418cf477f4eSGeorge Burgess IV    return env, result_args
419cf477f4eSGeorge Burgess IV
420cf477f4eSGeorge Burgess IV
421cf477f4eSGeorge Burgess IVdef _looks_like_llvm_dir(directory):
422cf477f4eSGeorge Burgess IV    """Arbitrary set of heuristics to determine if `directory` is an llvm dir.
423cf477f4eSGeorge Burgess IV
424cf477f4eSGeorge Burgess IV    Errs on the side of false-positives."""
425cf477f4eSGeorge Burgess IV
426cf477f4eSGeorge Burgess IV    contents = set(os.listdir(directory))
427cf477f4eSGeorge Burgess IV    expected_contents = [
428*040f1c78SPaul Kirth        "Maintainers.md",
429b71edfaaSTobias Hieta        "cmake",
430b71edfaaSTobias Hieta        "docs",
431b71edfaaSTobias Hieta        "include",
432b71edfaaSTobias Hieta        "utils",
433cf477f4eSGeorge Burgess IV    ]
434cf477f4eSGeorge Burgess IV
435cf477f4eSGeorge Burgess IV    if not all(c in contents for c in expected_contents):
436cf477f4eSGeorge Burgess IV        return False
437cf477f4eSGeorge Burgess IV
438cf477f4eSGeorge Burgess IV    try:
439b71edfaaSTobias Hieta        include_listing = os.listdir(os.path.join(directory, "include"))
440cf477f4eSGeorge Burgess IV    except NotADirectoryError:
441cf477f4eSGeorge Burgess IV        return False
442cf477f4eSGeorge Burgess IV
443b71edfaaSTobias Hieta    return "llvm" in include_listing
444cf477f4eSGeorge Burgess IV
445cf477f4eSGeorge Burgess IV
446cf477f4eSGeorge Burgess IVdef _die(*args, **kwargs):
447b71edfaaSTobias Hieta    kwargs["file"] = sys.stderr
448cf477f4eSGeorge Burgess IV    print(*args, **kwargs)
449cf477f4eSGeorge Burgess IV    sys.exit(1)
450cf477f4eSGeorge Burgess IV
451cf477f4eSGeorge Burgess IV
452cf477f4eSGeorge Burgess IVdef _main():
453cf477f4eSGeorge Burgess IV    env, args = _parse_args()
454cf477f4eSGeorge Burgess IV
455cf477f4eSGeorge Burgess IV    if not _looks_like_llvm_dir(env.llvm_dir):
456b71edfaaSTobias Hieta        _die("Looks like %s isn't an LLVM directory; please see --help" % env.llvm_dir)
457b71edfaaSTobias Hieta    if not env.has_llvm_subproject("clang"):
458b71edfaaSTobias Hieta        _die("Need a clang checkout at tools/clang")
459b71edfaaSTobias Hieta    if not env.has_llvm_subproject("compiler-rt"):
460b71edfaaSTobias Hieta        _die("Need a compiler-rt checkout at projects/compiler-rt")
461cf477f4eSGeorge Burgess IV
462cf477f4eSGeorge Burgess IV    def status(*args):
463cf477f4eSGeorge Burgess IV        print(*args, file=sys.stderr)
464cf477f4eSGeorge Burgess IV
465cf477f4eSGeorge Burgess IV    if args.stage1_dir is None:
466b71edfaaSTobias Hieta        status("*** Building stage1 clang...")
467cf477f4eSGeorge Burgess IV        stage1_out = _build_stage1_clang(env)
468cf477f4eSGeorge Burgess IV    else:
469cf477f4eSGeorge Burgess IV        stage1_out = args.stage1_dir
470cf477f4eSGeorge Burgess IV
471b71edfaaSTobias Hieta    status("*** Building instrumented clang...")
472cf477f4eSGeorge Burgess IV    instrumented_out, profile_dir = _build_instrumented_clang(env, stage1_out)
473b71edfaaSTobias Hieta    status("*** Running profdata benchmarks...")
474cf477f4eSGeorge Burgess IV    _run_benchmark(env, instrumented_out, args.include_debug_info)
475b71edfaaSTobias Hieta    status("*** Generating profile...")
476b71edfaaSTobias Hieta    _generate_instrumented_clang_profile(
477b71edfaaSTobias Hieta        env, stage1_out, profile_dir, args.profile_location
478b71edfaaSTobias Hieta    )
479cf477f4eSGeorge Burgess IV
480b71edfaaSTobias Hieta    print("Final profile:", args.profile_location)
481cf477f4eSGeorge Burgess IV    if args.do_optimized_build:
482b71edfaaSTobias Hieta        status("*** Building PGO-optimized binaries...")
483b71edfaaSTobias Hieta        optimized_out = _build_optimized_clang(env, stage1_out, args.profile_location)
484b71edfaaSTobias Hieta        print("Final build directory:", optimized_out)
485cf477f4eSGeorge Burgess IV
486cf477f4eSGeorge Burgess IV
487b71edfaaSTobias Hietaif __name__ == "__main__":
488cf477f4eSGeorge Burgess IV    _main()
489