xref: /llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp (revision 564fd62aedfde6358baa1776a2de975b45bc7778)
1a443b3d1SSam McCall //===-- HTMLLogger.cpp ----------------------------------------------------===//
2a443b3d1SSam McCall //
3a443b3d1SSam McCall // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a443b3d1SSam McCall // See https://llvm.org/LICENSE.txt for license information.
5a443b3d1SSam McCall // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a443b3d1SSam McCall //
7a443b3d1SSam McCall //===----------------------------------------------------------------------===//
8a443b3d1SSam McCall //
9a443b3d1SSam McCall // This file implements the HTML logger. Given a directory dir/, we write
10a443b3d1SSam McCall // dir/0.html for the first analysis, etc.
11a443b3d1SSam McCall // These files contain a visualization that allows inspecting the CFG and the
12a443b3d1SSam McCall // state of the analysis at each point.
13a443b3d1SSam McCall // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
14a443b3d1SSam McCall // so each output file is self-contained.
15a443b3d1SSam McCall //
16a443b3d1SSam McCall // VIEWS
17a443b3d1SSam McCall //
18a443b3d1SSam McCall // The timeline and function view are always shown. These allow selecting basic
19a443b3d1SSam McCall // blocks, statements within them, and processing iterations (BBs are visited
20a443b3d1SSam McCall // multiple times when e.g. loops are involved).
21a443b3d1SSam McCall // These are written directly into the HTML body.
22a443b3d1SSam McCall //
23a443b3d1SSam McCall // There are also listings of particular basic blocks, and dumps of the state
24a443b3d1SSam McCall // at particular analysis points (i.e. BB2 iteration 3 statement 2).
25a443b3d1SSam McCall // These are only shown when the relevant BB/analysis point is *selected*.
26a443b3d1SSam McCall //
27a443b3d1SSam McCall // DATA AND TEMPLATES
28a443b3d1SSam McCall //
29a443b3d1SSam McCall // The HTML proper is mostly static.
30a443b3d1SSam McCall // The analysis data is in a JSON object HTMLLoggerData which is embedded as
31a443b3d1SSam McCall // a <script> in the <head>.
32a443b3d1SSam McCall // This gets rendered into DOM by a simple template processor which substitutes
33a443b3d1SSam McCall // the data into <template> tags embedded in the HTML. (see inflate() in JS).
34a443b3d1SSam McCall //
35a443b3d1SSam McCall // SELECTION
36a443b3d1SSam McCall //
37a443b3d1SSam McCall // This is the only real interactive mechanism.
38a443b3d1SSam McCall //
39a443b3d1SSam McCall // At any given time, there are several named selections, e.g.:
40a443b3d1SSam McCall //   bb: B2               (basic block 0 is selected)
41a443b3d1SSam McCall //   elt: B2.4            (statement 4 is selected)
42a443b3d1SSam McCall //   iter: B2:1           (iteration 1 of the basic block is selected)
43a443b3d1SSam McCall //   hover: B3            (hovering over basic block 3)
44a443b3d1SSam McCall //
45a443b3d1SSam McCall // The selection is updated by mouse events: hover by moving the mouse and
46a443b3d1SSam McCall // others by clicking. Elements that are click targets generally have attributes
47a443b3d1SSam McCall // (id or data-foo) that define what they should select.
48a443b3d1SSam McCall // See watchSelection() in JS for the exact logic.
49a443b3d1SSam McCall //
50a443b3d1SSam McCall // When the "bb" selection is set to "B2":
51a443b3d1SSam McCall //   - sections <section data-selection="bb"> get shown
52a443b3d1SSam McCall //   - templates under such sections get re-rendered
53a443b3d1SSam McCall //   - elements with class/id "B2" get class "bb-select"
54a443b3d1SSam McCall //
55a443b3d1SSam McCall //===----------------------------------------------------------------------===//
56a443b3d1SSam McCall 
5759ff3adcSmartinboehme #include "clang/Analysis/FlowSensitive/AdornedCFG.h"
58a443b3d1SSam McCall #include "clang/Analysis/FlowSensitive/DebugSupport.h"
59a443b3d1SSam McCall #include "clang/Analysis/FlowSensitive/Logger.h"
60a443b3d1SSam McCall #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
61a443b3d1SSam McCall #include "clang/Analysis/FlowSensitive/Value.h"
62a443b3d1SSam McCall #include "clang/Basic/SourceManager.h"
63a443b3d1SSam McCall #include "clang/Lex/Lexer.h"
64a443b3d1SSam McCall #include "llvm/ADT/DenseMap.h"
65a443b3d1SSam McCall #include "llvm/ADT/ScopeExit.h"
66a443b3d1SSam McCall #include "llvm/Support/Error.h"
67a443b3d1SSam McCall #include "llvm/Support/FormatVariadic.h"
68a443b3d1SSam McCall #include "llvm/Support/JSON.h"
69a443b3d1SSam McCall #include "llvm/Support/Program.h"
70b56b15edSSam McCall #include "llvm/Support/ScopedPrinter.h"
71a443b3d1SSam McCall #include "llvm/Support/raw_ostream.h"
72a443b3d1SSam McCall // Defines assets: HTMLLogger_{html_js,css}
73a443b3d1SSam McCall #include "HTMLLogger.inc"
74a443b3d1SSam McCall 
75a443b3d1SSam McCall namespace clang::dataflow {
76a443b3d1SSam McCall namespace {
77a443b3d1SSam McCall 
78a443b3d1SSam McCall // Render a graphviz graph specification to SVG using the `dot` tool.
79a443b3d1SSam McCall llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
80a443b3d1SSam McCall 
81a443b3d1SSam McCall using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
82a443b3d1SSam McCall 
83b56b15edSSam McCall // Recursively dumps Values/StorageLocations as JSON
84b56b15edSSam McCall class ModelDumper {
85b56b15edSSam McCall public:
86b56b15edSSam McCall   ModelDumper(llvm::json::OStream &JOS, const Environment &Env)
87b56b15edSSam McCall       : JOS(JOS), Env(Env) {}
88b56b15edSSam McCall 
89b56b15edSSam McCall   void dump(Value &V) {
90b56b15edSSam McCall     JOS.attribute("value_id", llvm::to_string(&V));
91b56b15edSSam McCall     if (!Visited.insert(&V).second)
92b56b15edSSam McCall       return;
93b56b15edSSam McCall 
94b56b15edSSam McCall     JOS.attribute("kind", debugString(V.getKind()));
95b56b15edSSam McCall 
96b56b15edSSam McCall     switch (V.getKind()) {
97b56b15edSSam McCall     case Value::Kind::Integer:
98b56b15edSSam McCall     case Value::Kind::TopBool:
99b56b15edSSam McCall     case Value::Kind::AtomicBool:
100fc9821a8SSam McCall     case Value::Kind::FormulaBool:
101b56b15edSSam McCall       break;
102b56b15edSSam McCall     case Value::Kind::Pointer:
103b56b15edSSam McCall       JOS.attributeObject(
104b56b15edSSam McCall           "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); });
105b56b15edSSam McCall       break;
106b56b15edSSam McCall     }
107b56b15edSSam McCall 
108b56b15edSSam McCall     for (const auto& Prop : V.properties())
109b56b15edSSam McCall       JOS.attributeObject(("p:" + Prop.first()).str(),
110b56b15edSSam McCall                           [&] { dump(*Prop.second); });
111b56b15edSSam McCall 
112b56b15edSSam McCall     // Running the SAT solver is expensive, but knowing which booleans are
113b56b15edSSam McCall     // guaranteed true/false here is valuable and hard to determine by hand.
114b56b15edSSam McCall     if (auto *B = llvm::dyn_cast<BoolValue>(&V)) {
115fc9821a8SSam McCall       JOS.attribute("formula", llvm::to_string(B->formula()));
116526c9b7eSmartinboehme       JOS.attribute("truth", Env.proves(B->formula()) ? "true"
117526c9b7eSmartinboehme                              : Env.proves(Env.arena().makeNot(B->formula()))
118b56b15edSSam McCall                                  ? "false"
119b56b15edSSam McCall                                  : "unknown");
120b56b15edSSam McCall     }
121b56b15edSSam McCall   }
122b56b15edSSam McCall   void dump(const StorageLocation &L) {
123b56b15edSSam McCall     JOS.attribute("location", llvm::to_string(&L));
124b56b15edSSam McCall     if (!Visited.insert(&L).second)
125b56b15edSSam McCall       return;
126b56b15edSSam McCall 
127b56b15edSSam McCall     JOS.attribute("type", L.getType().getAsString());
128e8fce958Smartinboehme     if (!L.getType()->isRecordType())
129b56b15edSSam McCall       if (auto *V = Env.getValue(L))
130b56b15edSSam McCall         dump(*V);
131e791535bSKinuko Yasuda 
132e791535bSKinuko Yasuda     if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) {
133e791535bSKinuko Yasuda       for (const auto &Child : RLoc->children())
134e791535bSKinuko Yasuda         JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] {
135e791535bSKinuko Yasuda           if (Child.second)
136*564fd62aSFlorian Mayer             dump(*Child.second);
137e791535bSKinuko Yasuda         });
13871f2ec2dSmartinboehme 
13971f2ec2dSmartinboehme       for (const auto &SyntheticField : RLoc->synthetic_fields())
14071f2ec2dSmartinboehme         JOS.attributeObject(("sf:" + SyntheticField.first()).str(),
14171f2ec2dSmartinboehme                             [&] { dump(*SyntheticField.second); });
142e791535bSKinuko Yasuda     }
143b56b15edSSam McCall   }
144b56b15edSSam McCall 
145b56b15edSSam McCall   llvm::DenseSet<const void*> Visited;
146b56b15edSSam McCall   llvm::json::OStream &JOS;
147b56b15edSSam McCall   const Environment &Env;
148b56b15edSSam McCall };
149b56b15edSSam McCall 
150a443b3d1SSam McCall class HTMLLogger : public Logger {
151ed65ced2Smartinboehme   struct Iteration {
152ed65ced2Smartinboehme     const CFGBlock *Block;
153ed65ced2Smartinboehme     unsigned Iter;
154ed65ced2Smartinboehme     bool PostVisit;
1552be7c651Smartinboehme     bool Converged;
156ed65ced2Smartinboehme   };
157ed65ced2Smartinboehme 
158a443b3d1SSam McCall   StreamFactory Streams;
159a443b3d1SSam McCall   std::unique_ptr<llvm::raw_ostream> OS;
16082324bc9Smartinboehme   std::string JSON;
16182324bc9Smartinboehme   llvm::raw_string_ostream JStringStream{JSON};
16282324bc9Smartinboehme   llvm::json::OStream JOS{JStringStream, /*Indent=*/2};
163a443b3d1SSam McCall 
16459ff3adcSmartinboehme   const AdornedCFG *ACFG;
165a443b3d1SSam McCall   // Timeline of iterations of CFG block visitation.
166ed65ced2Smartinboehme   std::vector<Iteration> Iters;
1672be7c651Smartinboehme   // Indexes  in `Iters` of the iterations for each block.
1682be7c651Smartinboehme   llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters;
16982324bc9Smartinboehme   // For a given block ID, did the block converge (on the last iteration)?
17082324bc9Smartinboehme   llvm::BitVector BlockConverged;
171a443b3d1SSam McCall   // The messages logged in the current context but not yet written.
172a443b3d1SSam McCall   std::string ContextLogs;
173a443b3d1SSam McCall   // The number of elements we have visited within the current CFG block.
174a443b3d1SSam McCall   unsigned ElementIndex;
175a443b3d1SSam McCall 
176a443b3d1SSam McCall public:
177a443b3d1SSam McCall   explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
17859ff3adcSmartinboehme   void beginAnalysis(const AdornedCFG &ACFG,
179a443b3d1SSam McCall                      TypeErasedDataflowAnalysis &A) override {
180a443b3d1SSam McCall     OS = Streams();
18159ff3adcSmartinboehme     this->ACFG = &ACFG;
182a443b3d1SSam McCall     *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;
183a443b3d1SSam McCall 
18459ff3adcSmartinboehme     BlockConverged.resize(ACFG.getCFG().getNumBlockIDs());
18582324bc9Smartinboehme 
18659ff3adcSmartinboehme     const auto &D = ACFG.getDecl();
187a443b3d1SSam McCall     const auto &SM = A.getASTContext().getSourceManager();
188a443b3d1SSam McCall     *OS << "<title>";
189e6cd409fSMartin Braenne     if (const auto *ND = dyn_cast<NamedDecl>(&D))
190a443b3d1SSam McCall       *OS << ND->getNameAsString() << " at ";
191e6cd409fSMartin Braenne     *OS << SM.getFilename(D.getLocation()) << ":"
192e6cd409fSMartin Braenne         << SM.getSpellingLineNumber(D.getLocation());
193a443b3d1SSam McCall     *OS << "</title>\n";
194a443b3d1SSam McCall 
195a443b3d1SSam McCall     *OS << "<style>" << HTMLLogger_css << "</style>\n";
196a443b3d1SSam McCall     *OS << "<script>" << HTMLLogger_js << "</script>\n";
197a443b3d1SSam McCall 
198a443b3d1SSam McCall     writeCode();
19982324bc9Smartinboehme     JOS.objectBegin();
20082324bc9Smartinboehme     JOS.attributeBegin("states");
20182324bc9Smartinboehme     JOS.objectBegin();
202a443b3d1SSam McCall   }
203a443b3d1SSam McCall   // Between beginAnalysis() and endAnalysis() we write all the states for
204a443b3d1SSam McCall   // particular analysis points into the `timeline` array.
205a443b3d1SSam McCall   void endAnalysis() override {
20682324bc9Smartinboehme     JOS.objectEnd();
20782324bc9Smartinboehme     JOS.attributeEnd();
208a443b3d1SSam McCall 
20982324bc9Smartinboehme     JOS.attributeArray("timeline", [&] {
210a443b3d1SSam McCall       for (const auto &E : Iters) {
21182324bc9Smartinboehme         JOS.object([&] {
21282324bc9Smartinboehme           JOS.attribute("block", blockID(E.Block->getBlockID()));
21382324bc9Smartinboehme           JOS.attribute("iter", E.Iter);
21482324bc9Smartinboehme           JOS.attribute("post_visit", E.PostVisit);
21582324bc9Smartinboehme           JOS.attribute("converged", E.Converged);
216a443b3d1SSam McCall         });
217a443b3d1SSam McCall       }
218a443b3d1SSam McCall     });
21982324bc9Smartinboehme     JOS.attributeObject("cfg", [&] {
220a443b3d1SSam McCall       for (const auto &E : BlockIters)
221a443b3d1SSam McCall         writeBlock(*E.first, E.second);
222a443b3d1SSam McCall     });
223a443b3d1SSam McCall 
22482324bc9Smartinboehme     JOS.objectEnd();
22582324bc9Smartinboehme 
22682324bc9Smartinboehme     writeCFG();
22782324bc9Smartinboehme 
22882324bc9Smartinboehme     *OS << "<script>var HTMLLoggerData = \n";
22982324bc9Smartinboehme     *OS << JSON;
230a443b3d1SSam McCall     *OS << ";\n</script>\n";
231a443b3d1SSam McCall     *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;
232a443b3d1SSam McCall   }
233a443b3d1SSam McCall 
234ed65ced2Smartinboehme   void enterBlock(const CFGBlock &B, bool PostVisit) override {
2352be7c651Smartinboehme     llvm::SmallVector<size_t> &BIter = BlockIters[&B];
236ed65ced2Smartinboehme     unsigned IterNum = BIter.size() + 1;
2372be7c651Smartinboehme     BIter.push_back(Iters.size());
2382be7c651Smartinboehme     Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false});
23982324bc9Smartinboehme     if (!PostVisit)
24082324bc9Smartinboehme       BlockConverged[B.getBlockID()] = false;
241a443b3d1SSam McCall     ElementIndex = 0;
242a443b3d1SSam McCall   }
243a443b3d1SSam McCall   void enterElement(const CFGElement &E) override {
244a443b3d1SSam McCall     ++ElementIndex;
245a443b3d1SSam McCall   }
246a443b3d1SSam McCall 
247a443b3d1SSam McCall   static std::string blockID(unsigned Block) {
248a443b3d1SSam McCall     return llvm::formatv("B{0}", Block);
249a443b3d1SSam McCall   }
250a443b3d1SSam McCall   static std::string eltID(unsigned Block, unsigned Element) {
251a443b3d1SSam McCall     return llvm::formatv("B{0}.{1}", Block, Element);
252a443b3d1SSam McCall   }
253a443b3d1SSam McCall   static std::string iterID(unsigned Block, unsigned Iter) {
254a443b3d1SSam McCall     return llvm::formatv("B{0}:{1}", Block, Iter);
255a443b3d1SSam McCall   }
256a443b3d1SSam McCall   static std::string elementIterID(unsigned Block, unsigned Iter,
257a443b3d1SSam McCall                                    unsigned Element) {
258a443b3d1SSam McCall     return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);
259a443b3d1SSam McCall   }
260a443b3d1SSam McCall 
261a443b3d1SSam McCall   // Write the analysis state associated with a particular analysis point.
262a443b3d1SSam McCall   // FIXME: this dump is fairly opaque. We should show:
263a443b3d1SSam McCall   //  - values associated with the current Stmt
264a443b3d1SSam McCall   //  - values associated with its children
265a443b3d1SSam McCall   //  - meaningful names for values
266a443b3d1SSam McCall   //  - which boolean values are implied true/false by the flow condition
267a443b3d1SSam McCall   void recordState(TypeErasedDataflowAnalysisState &State) override {
268ed65ced2Smartinboehme     unsigned Block = Iters.back().Block->getBlockID();
269ed65ced2Smartinboehme     unsigned Iter = Iters.back().Iter;
270ed65ced2Smartinboehme     bool PostVisit = Iters.back().PostVisit;
27182324bc9Smartinboehme     JOS.attributeObject(elementIterID(Block, Iter, ElementIndex), [&] {
27282324bc9Smartinboehme       JOS.attribute("block", blockID(Block));
27382324bc9Smartinboehme       JOS.attribute("iter", Iter);
27482324bc9Smartinboehme       JOS.attribute("post_visit", PostVisit);
27582324bc9Smartinboehme       JOS.attribute("element", ElementIndex);
276b56b15edSSam McCall 
277b56b15edSSam McCall       // If this state immediately follows an Expr, show its built-in model.
278b56b15edSSam McCall       if (ElementIndex > 0) {
279b56b15edSSam McCall         auto S =
280ed65ced2Smartinboehme             Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>();
281f76f6674SMartin Braenne         if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) {
282f76f6674SMartin Braenne           if (E->isPRValue()) {
283e8fce958Smartinboehme             if (!E->getType()->isRecordType())
284f76f6674SMartin Braenne               if (auto *V = State.Env.getValue(*E))
28582324bc9Smartinboehme                 JOS.attributeObject(
28682324bc9Smartinboehme                     "value", [&] { ModelDumper(JOS, State.Env).dump(*V); });
287f76f6674SMartin Braenne           } else {
288b244b6aeSMartin Braenne             if (auto *Loc = State.Env.getStorageLocation(*E))
28982324bc9Smartinboehme               JOS.attributeObject(
29082324bc9Smartinboehme                   "value", [&] { ModelDumper(JOS, State.Env).dump(*Loc); });
291b56b15edSSam McCall           }
292f76f6674SMartin Braenne         }
293f76f6674SMartin Braenne       }
294a443b3d1SSam McCall       if (!ContextLogs.empty()) {
29582324bc9Smartinboehme         JOS.attribute("logs", ContextLogs);
296a443b3d1SSam McCall         ContextLogs.clear();
297a443b3d1SSam McCall       }
298a443b3d1SSam McCall       {
299a443b3d1SSam McCall         std::string BuiltinLattice;
300a443b3d1SSam McCall         llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
301a443b3d1SSam McCall         State.Env.dump(BuiltinLatticeS);
30282324bc9Smartinboehme         JOS.attribute("builtinLattice", BuiltinLattice);
303a443b3d1SSam McCall       }
304a443b3d1SSam McCall     });
305a443b3d1SSam McCall   }
30682324bc9Smartinboehme   void blockConverged() override {
30782324bc9Smartinboehme     Iters.back().Converged = true;
30882324bc9Smartinboehme     BlockConverged[Iters.back().Block->getBlockID()] = true;
30982324bc9Smartinboehme   }
310a443b3d1SSam McCall 
311a443b3d1SSam McCall   void logText(llvm::StringRef S) override {
312a443b3d1SSam McCall     ContextLogs.append(S.begin(), S.end());
313a443b3d1SSam McCall     ContextLogs.push_back('\n');
314a443b3d1SSam McCall   }
315a443b3d1SSam McCall 
316a443b3d1SSam McCall private:
317a443b3d1SSam McCall   // Write the CFG block details.
318a443b3d1SSam McCall   // Currently this is just the list of elements in execution order.
319a443b3d1SSam McCall   // FIXME: an AST dump would be a useful view, too.
3202be7c651Smartinboehme   void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) {
32182324bc9Smartinboehme     JOS.attributeObject(blockID(B.getBlockID()), [&] {
32282324bc9Smartinboehme       JOS.attributeArray("iters", [&] {
3232be7c651Smartinboehme         for (size_t IterIdx : ItersForB) {
3242be7c651Smartinboehme           const Iteration &Iter = Iters[IterIdx];
32582324bc9Smartinboehme           JOS.object([&] {
32682324bc9Smartinboehme             JOS.attribute("iter", Iter.Iter);
32782324bc9Smartinboehme             JOS.attribute("post_visit", Iter.PostVisit);
32882324bc9Smartinboehme             JOS.attribute("converged", Iter.Converged);
329ed65ced2Smartinboehme           });
330ed65ced2Smartinboehme         }
331ed65ced2Smartinboehme       });
33282324bc9Smartinboehme       JOS.attributeArray("elements", [&] {
333a443b3d1SSam McCall         for (const auto &Elt : B.Elements) {
334a443b3d1SSam McCall           std::string Dump;
335a443b3d1SSam McCall           llvm::raw_string_ostream DumpS(Dump);
336a443b3d1SSam McCall           Elt.dumpToStream(DumpS);
33782324bc9Smartinboehme           JOS.value(Dump);
338a443b3d1SSam McCall         }
339a443b3d1SSam McCall       });
340a443b3d1SSam McCall     });
341a443b3d1SSam McCall   }
342a443b3d1SSam McCall 
343a443b3d1SSam McCall   // Write the code of function being examined.
344a443b3d1SSam McCall   // We want to overlay the code with <span>s that mark which BB particular
345a443b3d1SSam McCall   // tokens are associated with, and even which BB element (so that clicking
346a443b3d1SSam McCall   // can select the right element).
347a443b3d1SSam McCall   void writeCode() {
34859ff3adcSmartinboehme     const auto &AST = ACFG->getDecl().getASTContext();
349a443b3d1SSam McCall     bool Invalid = false;
350a443b3d1SSam McCall 
351a443b3d1SSam McCall     // Extract the source code from the original file.
352a443b3d1SSam McCall     // Pretty-printing from the AST would probably be nicer (no macros or
353a443b3d1SSam McCall     // indentation to worry about), but we need the boundaries of particular
354a443b3d1SSam McCall     // AST nodes and the printer doesn't provide this.
355a443b3d1SSam McCall     auto Range = clang::Lexer::makeFileCharRange(
35659ff3adcSmartinboehme         CharSourceRange::getTokenRange(ACFG->getDecl().getSourceRange()),
357a443b3d1SSam McCall         AST.getSourceManager(), AST.getLangOpts());
358a443b3d1SSam McCall     if (Range.isInvalid())
359a443b3d1SSam McCall       return;
360a443b3d1SSam McCall     llvm::StringRef Code = clang::Lexer::getSourceText(
361a443b3d1SSam McCall         Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);
362a443b3d1SSam McCall     if (Invalid)
363a443b3d1SSam McCall       return;
364a443b3d1SSam McCall 
365a443b3d1SSam McCall     // TokenInfo stores the BB and set of elements that a token is part of.
366a443b3d1SSam McCall     struct TokenInfo {
36746a56931SKazu Hirata       enum : unsigned { Missing = static_cast<unsigned>(-1) };
36811dfb3cbSAaron Ballman 
369a443b3d1SSam McCall       // The basic block this is part of.
370a443b3d1SSam McCall       // This is the BB of the stmt with the smallest containing range.
371a443b3d1SSam McCall       unsigned BB = Missing;
372a443b3d1SSam McCall       unsigned BBPriority = 0;
373a443b3d1SSam McCall       // The most specific stmt this is part of (smallest range).
374a443b3d1SSam McCall       unsigned Elt = Missing;
375a443b3d1SSam McCall       unsigned EltPriority = 0;
376a443b3d1SSam McCall       // All stmts this is part of.
377a443b3d1SSam McCall       SmallVector<unsigned> Elts;
378a443b3d1SSam McCall 
379a443b3d1SSam McCall       // Mark this token as being part of BB.Elt.
380a443b3d1SSam McCall       // RangeLen is the character length of the element's range, used to
381a443b3d1SSam McCall       // distinguish inner vs outer statements.
382a443b3d1SSam McCall       // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
383a443b3d1SSam McCall       // However "a" has a smaller range, so is more specific. Clicking on the
384a443b3d1SSam McCall       // token "a" should select the stmt "a".
385a443b3d1SSam McCall       void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
386a443b3d1SSam McCall         // A worse BB (larger range) => ignore.
387a443b3d1SSam McCall         if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
388a443b3d1SSam McCall           return;
389a443b3d1SSam McCall         if (BB != this->BB) {
390a443b3d1SSam McCall           this->BB = BB;
391a443b3d1SSam McCall           Elts.clear();
392a443b3d1SSam McCall           BBPriority = RangeLen;
393a443b3d1SSam McCall         }
394a443b3d1SSam McCall         BBPriority = std::min(BBPriority, RangeLen);
395a443b3d1SSam McCall         Elts.push_back(Elt);
396a443b3d1SSam McCall         if (this->Elt == Missing || EltPriority > RangeLen)
397a443b3d1SSam McCall           this->Elt = Elt;
398a443b3d1SSam McCall       }
399a443b3d1SSam McCall       bool operator==(const TokenInfo &Other) const {
400a443b3d1SSam McCall         return std::tie(BB, Elt, Elts) ==
401a443b3d1SSam McCall                std::tie(Other.BB, Other.Elt, Other.Elts);
402a443b3d1SSam McCall       }
403a443b3d1SSam McCall       // Write the attributes for the <span> on this token.
404a443b3d1SSam McCall       void write(llvm::raw_ostream &OS) const {
405a443b3d1SSam McCall         OS << "class='c";
406a443b3d1SSam McCall         if (BB != Missing)
407a443b3d1SSam McCall           OS << " " << blockID(BB);
408a443b3d1SSam McCall         for (unsigned Elt : Elts)
409a443b3d1SSam McCall           OS << " " << eltID(BB, Elt);
410a443b3d1SSam McCall         OS << "'";
411a443b3d1SSam McCall 
412a443b3d1SSam McCall         if (Elt != Missing)
413a443b3d1SSam McCall           OS << " data-elt='" << eltID(BB, Elt) << "'";
414a443b3d1SSam McCall         if (BB != Missing)
415a443b3d1SSam McCall           OS << " data-bb='" << blockID(BB) << "'";
416a443b3d1SSam McCall       }
417a443b3d1SSam McCall     };
418a443b3d1SSam McCall 
419a443b3d1SSam McCall     // Construct one TokenInfo per character in a flat array.
420a443b3d1SSam McCall     // This is inefficient (chars in a token all have the same info) but simple.
421a443b3d1SSam McCall     std::vector<TokenInfo> State(Code.size());
42259ff3adcSmartinboehme     for (const auto *Block : ACFG->getCFG()) {
423a443b3d1SSam McCall       unsigned EltIndex = 0;
424a443b3d1SSam McCall       for (const auto& Elt : *Block) {
425a443b3d1SSam McCall         ++EltIndex;
426a443b3d1SSam McCall         if (const auto S = Elt.getAs<CFGStmt>()) {
427a443b3d1SSam McCall           auto EltRange = clang::Lexer::makeFileCharRange(
428a443b3d1SSam McCall               CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
429a443b3d1SSam McCall               AST.getSourceManager(), AST.getLangOpts());
430a443b3d1SSam McCall           if (EltRange.isInvalid())
431a443b3d1SSam McCall             continue;
432a443b3d1SSam McCall           if (EltRange.getBegin() < Range.getBegin() ||
433a443b3d1SSam McCall               EltRange.getEnd() >= Range.getEnd() ||
434a443b3d1SSam McCall               EltRange.getEnd() < Range.getBegin() ||
435a443b3d1SSam McCall               EltRange.getEnd() >= Range.getEnd())
436a443b3d1SSam McCall             continue;
437a443b3d1SSam McCall           unsigned Off = EltRange.getBegin().getRawEncoding() -
438a443b3d1SSam McCall                          Range.getBegin().getRawEncoding();
439a443b3d1SSam McCall           unsigned Len = EltRange.getEnd().getRawEncoding() -
440a443b3d1SSam McCall                          EltRange.getBegin().getRawEncoding();
441a443b3d1SSam McCall           for (unsigned I = 0; I < Len; ++I)
442a443b3d1SSam McCall             State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
443a443b3d1SSam McCall         }
444a443b3d1SSam McCall       }
445a443b3d1SSam McCall     }
446a443b3d1SSam McCall 
447a443b3d1SSam McCall     // Finally, write the code with the correct <span>s.
448a443b3d1SSam McCall     unsigned Line =
449a443b3d1SSam McCall         AST.getSourceManager().getSpellingLineNumber(Range.getBegin());
450a443b3d1SSam McCall     *OS << "<template data-copy='code'>\n";
451a443b3d1SSam McCall     *OS << "<code class='filename'>";
452a443b3d1SSam McCall     llvm::printHTMLEscaped(
453a443b3d1SSam McCall         llvm::sys::path::filename(
454a443b3d1SSam McCall             AST.getSourceManager().getFilename(Range.getBegin())),
455a443b3d1SSam McCall         *OS);
456a443b3d1SSam McCall     *OS << "</code>";
457a443b3d1SSam McCall     *OS << "<code class='line' data-line='" << Line++ << "'>";
458a443b3d1SSam McCall     for (unsigned I = 0; I < Code.size(); ++I) {
459a443b3d1SSam McCall       // Don't actually write a <span> around each character, only break spans
460a443b3d1SSam McCall       // when the TokenInfo changes.
461a443b3d1SSam McCall       bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
462a443b3d1SSam McCall       bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
463a443b3d1SSam McCall       if (NeedOpen) {
464a443b3d1SSam McCall         *OS << "<span ";
465a443b3d1SSam McCall         State[I].write(*OS);
466a443b3d1SSam McCall         *OS << ">";
467a443b3d1SSam McCall       }
468a443b3d1SSam McCall       if (Code[I] == '\n')
469a443b3d1SSam McCall         *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";
470a443b3d1SSam McCall       else
471a443b3d1SSam McCall         llvm::printHTMLEscaped(Code.substr(I, 1), *OS);
472a443b3d1SSam McCall       if (NeedClose) *OS << "</span>";
473a443b3d1SSam McCall     }
474a443b3d1SSam McCall     *OS << "</code>\n";
475a443b3d1SSam McCall     *OS << "</template>";
476a443b3d1SSam McCall   }
477a443b3d1SSam McCall 
478a443b3d1SSam McCall   // Write the CFG diagram, a graph of basic blocks.
479a443b3d1SSam McCall   // Laying out graphs is hard, so we construct a graphviz description and shell
480a443b3d1SSam McCall   // out to `dot` to turn it into an SVG.
481a443b3d1SSam McCall   void writeCFG() {
482a443b3d1SSam McCall     *OS << "<template data-copy='cfg'>\n";
48359ff3adcSmartinboehme     if (auto SVG = renderSVG(buildCFGDot(ACFG->getCFG())))
484a443b3d1SSam McCall       *OS << *SVG;
485a443b3d1SSam McCall     else
486a443b3d1SSam McCall       *OS << "Can't draw CFG: " << toString(SVG.takeError());
487a443b3d1SSam McCall     *OS << "</template>\n";
488a443b3d1SSam McCall   }
489a443b3d1SSam McCall 
490a443b3d1SSam McCall   // Produce a graphviz description of a CFG.
49182324bc9Smartinboehme   std::string buildCFGDot(const clang::CFG &CFG) {
492a443b3d1SSam McCall     std::string Graph;
493a443b3d1SSam McCall     llvm::raw_string_ostream GraphS(Graph);
494a443b3d1SSam McCall     // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
495a443b3d1SSam McCall     GraphS << R"(digraph {
496a443b3d1SSam McCall       tooltip=" "
497a443b3d1SSam McCall       node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
498a443b3d1SSam McCall       edge[tooltip = " "]
499a443b3d1SSam McCall )";
50082324bc9Smartinboehme     for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) {
50182324bc9Smartinboehme       std::string Name = blockID(I);
50282324bc9Smartinboehme       // Rightwards arrow, vertical line
503a11ab139Smartinboehme       const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c";
50482324bc9Smartinboehme       if (BlockConverged[I])
50582324bc9Smartinboehme         Name += ConvergenceMarker;
50682324bc9Smartinboehme       GraphS << "  " << blockID(I) << " [id=" << blockID(I) << " label=\""
50782324bc9Smartinboehme              << Name << "\"]\n";
50882324bc9Smartinboehme     }
509a443b3d1SSam McCall     for (const auto *Block : CFG) {
510a443b3d1SSam McCall       for (const auto &Succ : Block->succs()) {
511771d7d71SMartin Braenne         if (Succ.getReachableBlock())
512a443b3d1SSam McCall           GraphS << "  " << blockID(Block->getBlockID()) << " -> "
513a443b3d1SSam McCall                  << blockID(Succ.getReachableBlock()->getBlockID()) << "\n";
514a443b3d1SSam McCall       }
515a443b3d1SSam McCall     }
516a443b3d1SSam McCall     GraphS << "}\n";
517a443b3d1SSam McCall     return Graph;
518a443b3d1SSam McCall   }
519a443b3d1SSam McCall };
520a443b3d1SSam McCall 
521a443b3d1SSam McCall // Nothing interesting here, just subprocess/temp-file plumbing.
522a443b3d1SSam McCall llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
5230304aa25SSam McCall   std::string DotPath;
5240304aa25SSam McCall   if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT"))
5250304aa25SSam McCall     DotPath = FromEnv;
5260304aa25SSam McCall   else {
5270304aa25SSam McCall     auto FromPath = llvm::sys::findProgramByName("dot");
5280304aa25SSam McCall     if (!FromPath)
5290304aa25SSam McCall       return llvm::createStringError(FromPath.getError(),
5300304aa25SSam McCall                                      "'dot' not found on PATH");
5310304aa25SSam McCall     DotPath = FromPath.get();
5320304aa25SSam McCall   }
533a443b3d1SSam McCall 
534a443b3d1SSam McCall   // Create input and output files for `dot` subprocess.
535a443b3d1SSam McCall   // (We create the output file as empty, to reserve the temp filename).
536a443b3d1SSam McCall   llvm::SmallString<256> Input, Output;
537a443b3d1SSam McCall   int InputFD;
538a443b3d1SSam McCall   if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
539a443b3d1SSam McCall                                                    Input))
540a443b3d1SSam McCall     return llvm::createStringError(EC, "failed to create `dot` temp input");
541a443b3d1SSam McCall   llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
542a443b3d1SSam McCall   auto DeleteInput =
543a443b3d1SSam McCall       llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });
544a443b3d1SSam McCall   if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
545a443b3d1SSam McCall     return llvm::createStringError(EC, "failed to create `dot` temp output");
546a443b3d1SSam McCall   auto DeleteOutput =
547a443b3d1SSam McCall       llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });
548a443b3d1SSam McCall 
549a443b3d1SSam McCall   std::vector<std::optional<llvm::StringRef>> Redirects = {
550a443b3d1SSam McCall       Input, Output,
551a443b3d1SSam McCall       /*stderr=*/std::nullopt};
552a443b3d1SSam McCall   std::string ErrMsg;
553a443b3d1SSam McCall   int Code = llvm::sys::ExecuteAndWait(
5540304aa25SSam McCall       DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,
555a443b3d1SSam McCall       /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
556a443b3d1SSam McCall   if (!ErrMsg.empty())
557a443b3d1SSam McCall     return llvm::createStringError(llvm::inconvertibleErrorCode(),
558a443b3d1SSam McCall                                    "'dot' failed: " + ErrMsg);
559a443b3d1SSam McCall   if (Code != 0)
560a443b3d1SSam McCall     return llvm::createStringError(llvm::inconvertibleErrorCode(),
561a443b3d1SSam McCall                                    "'dot' failed (" + llvm::Twine(Code) + ")");
562a443b3d1SSam McCall 
563a443b3d1SSam McCall   auto Buf = llvm::MemoryBuffer::getFile(Output);
564a443b3d1SSam McCall   if (!Buf)
565a443b3d1SSam McCall     return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
566a443b3d1SSam McCall 
567a443b3d1SSam McCall   // Output has <?xml> prefix we don't want. Skip to <svg> tag.
568a443b3d1SSam McCall   llvm::StringRef Result = Buf.get()->getBuffer();
569a443b3d1SSam McCall   auto Pos = Result.find("<svg");
570a443b3d1SSam McCall   if (Pos == llvm::StringRef::npos)
571a443b3d1SSam McCall     return llvm::createStringError(llvm::inconvertibleErrorCode(),
572a443b3d1SSam McCall                                    "Can't find <svg> tag in `dot` output");
573a443b3d1SSam McCall   return Result.substr(Pos).str();
574a443b3d1SSam McCall }
575a443b3d1SSam McCall 
576a443b3d1SSam McCall } // namespace
577a443b3d1SSam McCall 
578a443b3d1SSam McCall std::unique_ptr<Logger>
579a443b3d1SSam McCall Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
580a443b3d1SSam McCall   return std::make_unique<HTMLLogger>(std::move(Streams));
581a443b3d1SSam McCall }
582a443b3d1SSam McCall 
583a443b3d1SSam McCall } // namespace clang::dataflow
584