xref: /llvm-project/llvm/utils/check_ninja_deps.py (revision b71edfaa4ec3c998aadb35255ce2f60bba2940b0)
1#!/usr/bin/env python3
2#
3# ======- check-ninja-deps - build debugging script ----*- python -*--========#
4#
5# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6# See https://llvm.org/LICENSE.txt for license information.
7# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8#
9# ==------------------------------------------------------------------------==#
10
11"""Script to find missing formal dependencies in a build.ninja file.
12
13Suppose you have a header file that's autogenerated by (for example) Tablegen.
14If a C++ compilation step needs to include that header, then it must be
15executed after the Tablegen build step that generates the header. So the
16dependency graph in build.ninja should have the Tablegen build step as an
17ancestor of the C++ one. If it does not, then there's a latent build-failure
18bug, because depending on the order that ninja chooses to schedule its build
19steps, the C++ build step could run first, and fail because the header it needs
20does not exist yet.
21
22But because that kind of bug can easily be latent or intermittent, you might
23not notice, if your local test build happens to succeed. What you'd like is a
24way to detect problems of this kind reliably, even if they _didn't_ cause a
25failure on your first test.
26
27This script tries to do that. It's specific to the 'ninja' build tool, because
28ninja has useful auxiliary output modes that produce the necessary data:
29
30 - 'ninja -t graph' emits the full DAG of formal dependencies derived from
31   build.ninja (in Graphviz format)
32
33 - 'ninja -t deps' dumps the database of dependencies discovered at build time
34   by finding out which headers each source file actually included
35
36By cross-checking these two sources of data against each other, you can find
37true dependencies shown by 'deps' that are not reflected as formal dependencies
38in 'graph', i.e. a generated header that is required by a given source file but
39not forced to be built first.
40
41To run it:
42
43 - set up a build directory using ninja as the build tool (cmake -G Ninja)
44
45 - in that build directory, run ninja to perform an actual build (populating
46   the dependency database)
47
48 - then, in the same build directory, run this script. No arguments are needed
49   (but -C and -f are accepted, and propagated to ninja for convenience).
50
51Requirements outside core Python: the 'pygraphviz' module, available via pip or
52as the 'python3-pygraphviz' package in Debian and Ubuntu.
53
54"""
55
56import sys
57import argparse
58import subprocess
59import pygraphviz
60
61
62def toposort(g):
63    """Topologically sort a graph.
64
65    The input g is a pygraphviz graph object representing a DAG. The function
66    yields the vertices of g in an arbitrary order consistent with the edges,
67    so that for any edge v->w, v is output before w."""
68
69    # Count the number of immediate predecessors *not yet output* for each
70    # vertex. Initially this is simply their in-degrees.
71    ideg = {v: g.in_degree(v) for v in g.nodes_iter()}
72
73    # Set of vertices which can be output next, which is true if they have no
74    # immediate predecessor that has not already been output.
75    ready = {v for v, d in ideg.items() if d == 0}
76
77    # Keep outputting vertices while we have any to output.
78    while len(ready) > 0:
79        v = next(iter(ready))
80        yield v
81        ready.remove(v)
82
83        # Having output v, find each immediate successor w, and decrement its
84        # 'ideg' value by 1, to indicate that one more of its predecessors has
85        # now been output.
86        for w in g.out_neighbors(v):
87            ideg[w] -= 1
88            if ideg[w] == 0:
89                # If that counter reaches zero, w is ready to output.
90                ready.add(w)
91
92
93def ancestors(g, translate=lambda x: x):
94    """Form the set of ancestors for each vertex of a graph.
95
96    The input g is a pygraphviz graph object representing a DAG. The function
97    yields a sequence of pairs (vertex, set of proper ancestors).
98
99    The vertex names are all mapped through 'translate' before output. This
100    allows us to produce output referring to the label rather than the
101    identifier of every vertex.
102    """
103
104    # Store the set of (translated) ancestors for each vertex so far. a[v]
105    # includes (the translation of) v itself.
106    a = {}
107
108    for v in toposort(g):
109        vm = translate(v)
110
111        # Make up a[v], based on a[predecessors of v].
112        a[v] = {vm}  # include v itself
113        for w in g.in_neighbors(v):
114            a[v].update(a[w])
115
116        # Remove v itself from the set before yielding it, so that the caller
117        # doesn't get the trivial dependency of v on itself.
118        yield vm, a[v].difference({vm})
119
120
121def main():
122    parser = argparse.ArgumentParser(
123        description="Find missing formal dependencies on generated include "
124        "files in a build.ninja file."
125    )
126    parser.add_argument("-C", "--build-dir", help="Build directory (default cwd)")
127    parser.add_argument(
128        "-f", "--build-file", help="Build directory (default build.ninja)"
129    )
130    args = parser.parse_args()
131
132    errs = 0
133
134    ninja_prefix = ["ninja"]
135    if args.build_dir is not None:
136        ninja_prefix.extend(["-C", args.build_dir])
137    if args.build_file is not None:
138        ninja_prefix.extend(["-f", args.build_file])
139
140    # Get the formal dependency graph and decode it using pygraphviz.
141    g = pygraphviz.AGraph(
142        subprocess.check_output(ninja_prefix + ["-t", "graph"]).decode("UTF-8")
143    )
144
145    # Helper function to ask for the label of a vertex, which is where ninja's
146    # Graphviz output keeps the actual file name of the target.
147    label = lambda v: g.get_node(v).attr["label"]
148
149    # Start by making a list of build targets, i.e. generated files. These are
150    # just any graph vertex with at least one predecessor.
151    targets = set(label(v) for v in g.nodes_iter() if g.in_degree(v) > 0)
152
153    # Find the set of ancestors of each graph vertex. We pass in 'label' as a
154    # translation function, so that this gives us the set of ancestor _files_
155    # for a given _file_ rather than arbitrary numeric vertex ids.
156    deps = dict(ancestors(g, label))
157
158    # Fetch the cached dependency data and check it against our formal ancestry
159    # data.
160    currtarget = None
161    for line in (
162        subprocess.check_output(ninja_prefix + ["-t", "deps"])
163        .decode("UTF-8")
164        .splitlines()
165    ):
166        # ninja -t deps output consists of stanzas of the following form,
167        # separated by a blank line:
168        #
169        # target: [other information we don't need]
170        #     some_file.cpp
171        #     some_header.h
172        #     other_header.h
173        #
174        # We parse this ad-hoc by detecting the four leading spaces in a
175        # source-file line, and the colon in a target line. 'currtarget' stores
176        # the last target name we saw.
177        if line.startswith("    "):
178            dep = line[4:]
179            assert currtarget is not None, "Source file appeared before target"
180
181            # We're only interested in this dependency if it's a *generated*
182            # file, i.e. it is in our set of targets. Also, we must check that
183            # currtarget is actually a target we know about: the dependency
184            # cache is not cleared when build.ninja changes, so it can contain
185            # stale data from targets that existed only in past builds in the
186            # same directory.
187            if dep in targets and currtarget in deps and dep not in deps[currtarget]:
188                print(
189                    "error:",
190                    currtarget,
191                    "requires",
192                    dep,
193                    "but has no dependency on it",
194                    file=sys.stderr,
195                )
196                errs += 1
197        elif ":" in line:
198            currtarget = line.split(":", 1)[0]
199
200    if errs:
201        sys.exit("{:d} errors found".format(errs))
202
203
204if __name__ == "__main__":
205    main()
206