xref: /llvm-project/llvm/utils/check_ninja_deps.py (revision b71edfaa4ec3c998aadb35255ce2f60bba2940b0)
186d362f4SSimon Tatham#!/usr/bin/env python3
286d362f4SSimon Tatham#
386d362f4SSimon Tatham# ======- check-ninja-deps - build debugging script ----*- python -*--========#
486d362f4SSimon Tatham#
586d362f4SSimon Tatham# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
686d362f4SSimon Tatham# See https://llvm.org/LICENSE.txt for license information.
786d362f4SSimon Tatham# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
886d362f4SSimon Tatham#
986d362f4SSimon Tatham# ==------------------------------------------------------------------------==#
1086d362f4SSimon Tatham
1186d362f4SSimon Tatham"""Script to find missing formal dependencies in a build.ninja file.
1286d362f4SSimon Tatham
1386d362f4SSimon TathamSuppose you have a header file that's autogenerated by (for example) Tablegen.
1486d362f4SSimon TathamIf a C++ compilation step needs to include that header, then it must be
1586d362f4SSimon Tathamexecuted after the Tablegen build step that generates the header. So the
1686d362f4SSimon Tathamdependency graph in build.ninja should have the Tablegen build step as an
1786d362f4SSimon Tathamancestor of the C++ one. If it does not, then there's a latent build-failure
1886d362f4SSimon Tathambug, because depending on the order that ninja chooses to schedule its build
1986d362f4SSimon Tathamsteps, the C++ build step could run first, and fail because the header it needs
2086d362f4SSimon Tathamdoes not exist yet.
2186d362f4SSimon Tatham
2286d362f4SSimon TathamBut because that kind of bug can easily be latent or intermittent, you might
2386d362f4SSimon Tathamnot notice, if your local test build happens to succeed. What you'd like is a
2486d362f4SSimon Tathamway to detect problems of this kind reliably, even if they _didn't_ cause a
2586d362f4SSimon Tathamfailure on your first test.
2686d362f4SSimon Tatham
2786d362f4SSimon TathamThis script tries to do that. It's specific to the 'ninja' build tool, because
2886d362f4SSimon Tathamninja has useful auxiliary output modes that produce the necessary data:
2986d362f4SSimon Tatham
3086d362f4SSimon Tatham - 'ninja -t graph' emits the full DAG of formal dependencies derived from
3186d362f4SSimon Tatham   build.ninja (in Graphviz format)
3286d362f4SSimon Tatham
3386d362f4SSimon Tatham - 'ninja -t deps' dumps the database of dependencies discovered at build time
3486d362f4SSimon Tatham   by finding out which headers each source file actually included
3586d362f4SSimon Tatham
3686d362f4SSimon TathamBy cross-checking these two sources of data against each other, you can find
3786d362f4SSimon Tathamtrue dependencies shown by 'deps' that are not reflected as formal dependencies
3886d362f4SSimon Tathamin 'graph', i.e. a generated header that is required by a given source file but
3986d362f4SSimon Tathamnot forced to be built first.
4086d362f4SSimon Tatham
4186d362f4SSimon TathamTo run it:
4286d362f4SSimon Tatham
4386d362f4SSimon Tatham - set up a build directory using ninja as the build tool (cmake -G Ninja)
4486d362f4SSimon Tatham
4586d362f4SSimon Tatham - in that build directory, run ninja to perform an actual build (populating
4686d362f4SSimon Tatham   the dependency database)
4786d362f4SSimon Tatham
4886d362f4SSimon Tatham - then, in the same build directory, run this script. No arguments are needed
4986d362f4SSimon Tatham   (but -C and -f are accepted, and propagated to ninja for convenience).
5086d362f4SSimon Tatham
5186d362f4SSimon TathamRequirements outside core Python: the 'pygraphviz' module, available via pip or
5286d362f4SSimon Tathamas the 'python3-pygraphviz' package in Debian and Ubuntu.
5386d362f4SSimon Tatham
5486d362f4SSimon Tatham"""
5586d362f4SSimon Tatham
5686d362f4SSimon Tathamimport sys
5786d362f4SSimon Tathamimport argparse
5886d362f4SSimon Tathamimport subprocess
5986d362f4SSimon Tathamimport pygraphviz
6086d362f4SSimon Tatham
61*b71edfaaSTobias Hieta
6286d362f4SSimon Tathamdef toposort(g):
6386d362f4SSimon Tatham    """Topologically sort a graph.
6486d362f4SSimon Tatham
6586d362f4SSimon Tatham    The input g is a pygraphviz graph object representing a DAG. The function
6686d362f4SSimon Tatham    yields the vertices of g in an arbitrary order consistent with the edges,
6786d362f4SSimon Tatham    so that for any edge v->w, v is output before w."""
6886d362f4SSimon Tatham
6986d362f4SSimon Tatham    # Count the number of immediate predecessors *not yet output* for each
7086d362f4SSimon Tatham    # vertex. Initially this is simply their in-degrees.
7186d362f4SSimon Tatham    ideg = {v: g.in_degree(v) for v in g.nodes_iter()}
7286d362f4SSimon Tatham
7386d362f4SSimon Tatham    # Set of vertices which can be output next, which is true if they have no
7486d362f4SSimon Tatham    # immediate predecessor that has not already been output.
7586d362f4SSimon Tatham    ready = {v for v, d in ideg.items() if d == 0}
7686d362f4SSimon Tatham
7786d362f4SSimon Tatham    # Keep outputting vertices while we have any to output.
7886d362f4SSimon Tatham    while len(ready) > 0:
7986d362f4SSimon Tatham        v = next(iter(ready))
8086d362f4SSimon Tatham        yield v
8186d362f4SSimon Tatham        ready.remove(v)
8286d362f4SSimon Tatham
8386d362f4SSimon Tatham        # Having output v, find each immediate successor w, and decrement its
8486d362f4SSimon Tatham        # 'ideg' value by 1, to indicate that one more of its predecessors has
8586d362f4SSimon Tatham        # now been output.
8686d362f4SSimon Tatham        for w in g.out_neighbors(v):
8786d362f4SSimon Tatham            ideg[w] -= 1
8886d362f4SSimon Tatham            if ideg[w] == 0:
8986d362f4SSimon Tatham                # If that counter reaches zero, w is ready to output.
9086d362f4SSimon Tatham                ready.add(w)
9186d362f4SSimon Tatham
92*b71edfaaSTobias Hieta
9386d362f4SSimon Tathamdef ancestors(g, translate=lambda x: x):
9486d362f4SSimon Tatham    """Form the set of ancestors for each vertex of a graph.
9586d362f4SSimon Tatham
9686d362f4SSimon Tatham    The input g is a pygraphviz graph object representing a DAG. The function
9786d362f4SSimon Tatham    yields a sequence of pairs (vertex, set of proper ancestors).
9886d362f4SSimon Tatham
9986d362f4SSimon Tatham    The vertex names are all mapped through 'translate' before output. This
10086d362f4SSimon Tatham    allows us to produce output referring to the label rather than the
10186d362f4SSimon Tatham    identifier of every vertex.
10286d362f4SSimon Tatham    """
10386d362f4SSimon Tatham
10486d362f4SSimon Tatham    # Store the set of (translated) ancestors for each vertex so far. a[v]
10586d362f4SSimon Tatham    # includes (the translation of) v itself.
10686d362f4SSimon Tatham    a = {}
10786d362f4SSimon Tatham
10886d362f4SSimon Tatham    for v in toposort(g):
10986d362f4SSimon Tatham        vm = translate(v)
11086d362f4SSimon Tatham
11186d362f4SSimon Tatham        # Make up a[v], based on a[predecessors of v].
11286d362f4SSimon Tatham        a[v] = {vm}  # include v itself
11386d362f4SSimon Tatham        for w in g.in_neighbors(v):
11486d362f4SSimon Tatham            a[v].update(a[w])
11586d362f4SSimon Tatham
11686d362f4SSimon Tatham        # Remove v itself from the set before yielding it, so that the caller
11786d362f4SSimon Tatham        # doesn't get the trivial dependency of v on itself.
11886d362f4SSimon Tatham        yield vm, a[v].difference({vm})
11986d362f4SSimon Tatham
120*b71edfaaSTobias Hieta
12186d362f4SSimon Tathamdef main():
12286d362f4SSimon Tatham    parser = argparse.ArgumentParser(
123*b71edfaaSTobias Hieta        description="Find missing formal dependencies on generated include "
124*b71edfaaSTobias Hieta        "files in a build.ninja file."
125*b71edfaaSTobias Hieta    )
126*b71edfaaSTobias Hieta    parser.add_argument("-C", "--build-dir", help="Build directory (default cwd)")
127*b71edfaaSTobias Hieta    parser.add_argument(
128*b71edfaaSTobias Hieta        "-f", "--build-file", help="Build directory (default build.ninja)"
129*b71edfaaSTobias Hieta    )
13086d362f4SSimon Tatham    args = parser.parse_args()
13186d362f4SSimon Tatham
13286d362f4SSimon Tatham    errs = 0
13386d362f4SSimon Tatham
13486d362f4SSimon Tatham    ninja_prefix = ["ninja"]
13586d362f4SSimon Tatham    if args.build_dir is not None:
13686d362f4SSimon Tatham        ninja_prefix.extend(["-C", args.build_dir])
13786d362f4SSimon Tatham    if args.build_file is not None:
13886d362f4SSimon Tatham        ninja_prefix.extend(["-f", args.build_file])
13986d362f4SSimon Tatham
14086d362f4SSimon Tatham    # Get the formal dependency graph and decode it using pygraphviz.
141*b71edfaaSTobias Hieta    g = pygraphviz.AGraph(
142*b71edfaaSTobias Hieta        subprocess.check_output(ninja_prefix + ["-t", "graph"]).decode("UTF-8")
143*b71edfaaSTobias Hieta    )
14486d362f4SSimon Tatham
14586d362f4SSimon Tatham    # Helper function to ask for the label of a vertex, which is where ninja's
14686d362f4SSimon Tatham    # Graphviz output keeps the actual file name of the target.
14786d362f4SSimon Tatham    label = lambda v: g.get_node(v).attr["label"]
14886d362f4SSimon Tatham
14986d362f4SSimon Tatham    # Start by making a list of build targets, i.e. generated files. These are
15086d362f4SSimon Tatham    # just any graph vertex with at least one predecessor.
15186d362f4SSimon Tatham    targets = set(label(v) for v in g.nodes_iter() if g.in_degree(v) > 0)
15286d362f4SSimon Tatham
15386d362f4SSimon Tatham    # Find the set of ancestors of each graph vertex. We pass in 'label' as a
15486d362f4SSimon Tatham    # translation function, so that this gives us the set of ancestor _files_
15586d362f4SSimon Tatham    # for a given _file_ rather than arbitrary numeric vertex ids.
15686d362f4SSimon Tatham    deps = dict(ancestors(g, label))
15786d362f4SSimon Tatham
15886d362f4SSimon Tatham    # Fetch the cached dependency data and check it against our formal ancestry
15986d362f4SSimon Tatham    # data.
16086d362f4SSimon Tatham    currtarget = None
161*b71edfaaSTobias Hieta    for line in (
162*b71edfaaSTobias Hieta        subprocess.check_output(ninja_prefix + ["-t", "deps"])
163*b71edfaaSTobias Hieta        .decode("UTF-8")
164*b71edfaaSTobias Hieta        .splitlines()
165*b71edfaaSTobias Hieta    ):
16686d362f4SSimon Tatham        # ninja -t deps output consists of stanzas of the following form,
16786d362f4SSimon Tatham        # separated by a blank line:
16886d362f4SSimon Tatham        #
16986d362f4SSimon Tatham        # target: [other information we don't need]
17086d362f4SSimon Tatham        #     some_file.cpp
17186d362f4SSimon Tatham        #     some_header.h
17286d362f4SSimon Tatham        #     other_header.h
17386d362f4SSimon Tatham        #
17486d362f4SSimon Tatham        # We parse this ad-hoc by detecting the four leading spaces in a
17586d362f4SSimon Tatham        # source-file line, and the colon in a target line. 'currtarget' stores
17686d362f4SSimon Tatham        # the last target name we saw.
17786d362f4SSimon Tatham        if line.startswith("    "):
17886d362f4SSimon Tatham            dep = line[4:]
17986d362f4SSimon Tatham            assert currtarget is not None, "Source file appeared before target"
18086d362f4SSimon Tatham
18186d362f4SSimon Tatham            # We're only interested in this dependency if it's a *generated*
18286d362f4SSimon Tatham            # file, i.e. it is in our set of targets. Also, we must check that
18386d362f4SSimon Tatham            # currtarget is actually a target we know about: the dependency
18486d362f4SSimon Tatham            # cache is not cleared when build.ninja changes, so it can contain
18586d362f4SSimon Tatham            # stale data from targets that existed only in past builds in the
18686d362f4SSimon Tatham            # same directory.
187*b71edfaaSTobias Hieta            if dep in targets and currtarget in deps and dep not in deps[currtarget]:
188*b71edfaaSTobias Hieta                print(
189*b71edfaaSTobias Hieta                    "error:",
190*b71edfaaSTobias Hieta                    currtarget,
191*b71edfaaSTobias Hieta                    "requires",
192*b71edfaaSTobias Hieta                    dep,
193*b71edfaaSTobias Hieta                    "but has no dependency on it",
194*b71edfaaSTobias Hieta                    file=sys.stderr,
195*b71edfaaSTobias Hieta                )
19686d362f4SSimon Tatham                errs += 1
19786d362f4SSimon Tatham        elif ":" in line:
19886d362f4SSimon Tatham            currtarget = line.split(":", 1)[0]
19986d362f4SSimon Tatham
20086d362f4SSimon Tatham    if errs:
20186d362f4SSimon Tatham        sys.exit("{:d} errors found".format(errs))
20286d362f4SSimon Tatham
203*b71edfaaSTobias Hieta
204*b71edfaaSTobias Hietaif __name__ == "__main__":
20586d362f4SSimon Tatham    main()
206