xref: /llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp (revision f76f6674d8221f59f9e515e3cc03ad07fa72fe46)
1 //===-- HTMLLogger.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the HTML logger. Given a directory dir/, we write
10 // dir/0.html for the first analysis, etc.
11 // These files contain a visualization that allows inspecting the CFG and the
12 // state of the analysis at each point.
13 // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
14 // so each output file is self-contained.
15 //
16 // VIEWS
17 //
18 // The timeline and function view are always shown. These allow selecting basic
19 // blocks, statements within them, and processing iterations (BBs are visited
20 // multiple times when e.g. loops are involved).
21 // These are written directly into the HTML body.
22 //
23 // There are also listings of particular basic blocks, and dumps of the state
24 // at particular analysis points (i.e. BB2 iteration 3 statement 2).
25 // These are only shown when the relevant BB/analysis point is *selected*.
26 //
27 // DATA AND TEMPLATES
28 //
29 // The HTML proper is mostly static.
30 // The analysis data is in a JSON object HTMLLoggerData which is embedded as
31 // a <script> in the <head>.
32 // This gets rendered into DOM by a simple template processor which substitutes
33 // the data into <template> tags embedded in the HTML. (see inflate() in JS).
34 //
35 // SELECTION
36 //
37 // This is the only real interactive mechanism.
38 //
39 // At any given time, there are several named selections, e.g.:
40 //   bb: B2               (basic block 0 is selected)
41 //   elt: B2.4            (statement 4 is selected)
42 //   iter: B2:1           (iteration 1 of the basic block is selected)
43 //   hover: B3            (hovering over basic block 3)
44 //
45 // The selection is updated by mouse events: hover by moving the mouse and
46 // others by clicking. Elements that are click targets generally have attributes
47 // (id or data-foo) that define what they should select.
48 // See watchSelection() in JS for the exact logic.
49 //
50 // When the "bb" selection is set to "B2":
51 //   - sections <section data-selection="bb"> get shown
52 //   - templates under such sections get re-rendered
53 //   - elements with class/id "B2" get class "bb-select"
54 //
55 //===----------------------------------------------------------------------===//
56 
57 #include "clang/Analysis/FlowSensitive/ControlFlowContext.h"
58 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
59 #include "clang/Analysis/FlowSensitive/Logger.h"
60 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
61 #include "clang/Analysis/FlowSensitive/Value.h"
62 #include "clang/Basic/SourceManager.h"
63 #include "clang/Lex/Lexer.h"
64 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/ScopeExit.h"
66 #include "llvm/Support/Error.h"
67 #include "llvm/Support/FormatVariadic.h"
68 #include "llvm/Support/JSON.h"
69 #include "llvm/Support/Program.h"
70 #include "llvm/Support/ScopedPrinter.h"
71 #include "llvm/Support/raw_ostream.h"
72 // Defines assets: HTMLLogger_{html_js,css}
73 #include "HTMLLogger.inc"
74 
75 namespace clang::dataflow {
76 namespace {
77 
78 // Render a graphviz graph specification to SVG using the `dot` tool.
79 llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
80 
81 using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
82 
83 // Recursively dumps Values/StorageLocations as JSON
84 class ModelDumper {
85 public:
86   ModelDumper(llvm::json::OStream &JOS, const Environment &Env)
87       : JOS(JOS), Env(Env) {}
88 
89   void dump(Value &V) {
90     JOS.attribute("value_id", llvm::to_string(&V));
91     if (!Visited.insert(&V).second)
92       return;
93 
94     JOS.attribute("kind", debugString(V.getKind()));
95 
96     switch (V.getKind()) {
97     case Value::Kind::Integer:
98     case Value::Kind::TopBool:
99     case Value::Kind::AtomicBool:
100     case Value::Kind::FormulaBool:
101       break;
102     case Value::Kind::Pointer:
103       JOS.attributeObject(
104           "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); });
105       break;
106     case Value::Kind::Struct:
107       for (const auto &Child :
108            cast<StructValue>(V).getAggregateLoc().children())
109         JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] {
110           if (Child.second)
111             if (Value *Val = Env.getValue(*Child.second))
112               dump(*Val);
113         });
114       break;
115     }
116 
117     for (const auto& Prop : V.properties())
118       JOS.attributeObject(("p:" + Prop.first()).str(),
119                           [&] { dump(*Prop.second); });
120 
121     // Running the SAT solver is expensive, but knowing which booleans are
122     // guaranteed true/false here is valuable and hard to determine by hand.
123     if (auto *B = llvm::dyn_cast<BoolValue>(&V)) {
124       JOS.attribute("formula", llvm::to_string(B->formula()));
125       JOS.attribute(
126           "truth", Env.flowConditionImplies(B->formula()) ? "true"
127                    : Env.flowConditionImplies(Env.arena().makeNot(B->formula()))
128                        ? "false"
129                        : "unknown");
130     }
131   }
132   void dump(const StorageLocation &L) {
133     JOS.attribute("location", llvm::to_string(&L));
134     if (!Visited.insert(&L).second)
135       return;
136 
137     JOS.attribute("type", L.getType().getAsString());
138     if (auto *V = Env.getValue(L))
139       dump(*V);
140   }
141 
142   llvm::DenseSet<const void*> Visited;
143   llvm::json::OStream &JOS;
144   const Environment &Env;
145 };
146 
147 class HTMLLogger : public Logger {
148   StreamFactory Streams;
149   std::unique_ptr<llvm::raw_ostream> OS;
150   std::optional<llvm::json::OStream> JOS;
151 
152   const ControlFlowContext *CFG;
153   // Timeline of iterations of CFG block visitation.
154   std::vector<std::pair<const CFGBlock *, unsigned>> Iters;
155   // Number of times each CFG block has been seen.
156   llvm::DenseMap<const CFGBlock *, unsigned> BlockIters;
157   // The messages logged in the current context but not yet written.
158   std::string ContextLogs;
159   // The number of elements we have visited within the current CFG block.
160   unsigned ElementIndex;
161 
162 public:
163   explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
164   void beginAnalysis(const ControlFlowContext &CFG,
165                      TypeErasedDataflowAnalysis &A) override {
166     OS = Streams();
167     this->CFG = &CFG;
168     *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;
169 
170     if (const auto *D = CFG.getDecl()) {
171       const auto &SM = A.getASTContext().getSourceManager();
172       *OS << "<title>";
173       if (const auto *ND = dyn_cast<NamedDecl>(D))
174         *OS << ND->getNameAsString() << " at ";
175       *OS << SM.getFilename(D->getLocation()) << ":"
176           << SM.getSpellingLineNumber(D->getLocation());
177       *OS << "</title>\n";
178     };
179 
180     *OS << "<style>" << HTMLLogger_css << "</style>\n";
181     *OS << "<script>" << HTMLLogger_js << "</script>\n";
182 
183     writeCode();
184     writeCFG();
185 
186     *OS << "<script>var HTMLLoggerData = \n";
187     JOS.emplace(*OS, /*Indent=*/2);
188     JOS->objectBegin();
189     JOS->attributeBegin("states");
190     JOS->objectBegin();
191   }
192   // Between beginAnalysis() and endAnalysis() we write all the states for
193   // particular analysis points into the `timeline` array.
194   void endAnalysis() override {
195     JOS->objectEnd();
196     JOS->attributeEnd();
197 
198     JOS->attributeArray("timeline", [&] {
199       for (const auto &E : Iters) {
200         JOS->object([&] {
201           JOS->attribute("block", blockID(E.first->getBlockID()));
202           JOS->attribute("iter", E.second);
203         });
204       }
205     });
206     JOS->attributeObject("cfg", [&] {
207       for (const auto &E : BlockIters)
208         writeBlock(*E.first, E.second);
209     });
210 
211     JOS->objectEnd();
212     JOS.reset();
213     *OS << ";\n</script>\n";
214     *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;
215   }
216 
217   void enterBlock(const CFGBlock &B) override {
218     Iters.emplace_back(&B, ++BlockIters[&B]);
219     ElementIndex = 0;
220   }
221   void enterElement(const CFGElement &E) override {
222     ++ElementIndex;
223   }
224 
225   static std::string blockID(unsigned Block) {
226     return llvm::formatv("B{0}", Block);
227   }
228   static std::string eltID(unsigned Block, unsigned Element) {
229     return llvm::formatv("B{0}.{1}", Block, Element);
230   }
231   static std::string iterID(unsigned Block, unsigned Iter) {
232     return llvm::formatv("B{0}:{1}", Block, Iter);
233   }
234   static std::string elementIterID(unsigned Block, unsigned Iter,
235                                    unsigned Element) {
236     return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);
237   }
238 
239   // Write the analysis state associated with a particular analysis point.
240   // FIXME: this dump is fairly opaque. We should show:
241   //  - values associated with the current Stmt
242   //  - values associated with its children
243   //  - meaningful names for values
244   //  - which boolean values are implied true/false by the flow condition
245   void recordState(TypeErasedDataflowAnalysisState &State) override {
246     unsigned Block = Iters.back().first->getBlockID();
247     unsigned Iter = Iters.back().second;
248     JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] {
249       JOS->attribute("block", blockID(Block));
250       JOS->attribute("iter", Iter);
251       JOS->attribute("element", ElementIndex);
252 
253       // If this state immediately follows an Expr, show its built-in model.
254       if (ElementIndex > 0) {
255         auto S =
256             Iters.back().first->Elements[ElementIndex - 1].getAs<CFGStmt>();
257         if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) {
258           if (E->isPRValue()) {
259             if (auto *V = State.Env.getValue(*E))
260               JOS->attributeObject(
261                   "value", [&] { ModelDumper(*JOS, State.Env).dump(*V); });
262           } else {
263             if (auto *Loc = State.Env.getStorageLocationStrict(*E))
264               JOS->attributeObject(
265                   "value", [&] { ModelDumper(*JOS, State.Env).dump(*Loc); });
266           }
267         }
268       }
269       if (!ContextLogs.empty()) {
270         JOS->attribute("logs", ContextLogs);
271         ContextLogs.clear();
272       }
273       {
274         std::string BuiltinLattice;
275         llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
276         State.Env.dump(BuiltinLatticeS);
277         JOS->attribute("builtinLattice", BuiltinLattice);
278       }
279     });
280   }
281   void blockConverged() override { logText("Block converged"); }
282 
283   void logText(llvm::StringRef S) override {
284     ContextLogs.append(S.begin(), S.end());
285     ContextLogs.push_back('\n');
286   }
287 
288 private:
289   // Write the CFG block details.
290   // Currently this is just the list of elements in execution order.
291   // FIXME: an AST dump would be a useful view, too.
292   void writeBlock(const CFGBlock &B, unsigned Iters) {
293     JOS->attributeObject(blockID(B.getBlockID()), [&] {
294       JOS->attribute("iters", Iters);
295       JOS->attributeArray("elements", [&] {
296         for (const auto &Elt : B.Elements) {
297           std::string Dump;
298           llvm::raw_string_ostream DumpS(Dump);
299           Elt.dumpToStream(DumpS);
300           JOS->value(Dump);
301         }
302       });
303     });
304   }
305 
306   // Write the code of function being examined.
307   // We want to overlay the code with <span>s that mark which BB particular
308   // tokens are associated with, and even which BB element (so that clicking
309   // can select the right element).
310   void writeCode() {
311     if (!CFG->getDecl())
312       return;
313     const auto &AST = CFG->getDecl()->getASTContext();
314     bool Invalid = false;
315 
316     // Extract the source code from the original file.
317     // Pretty-printing from the AST would probably be nicer (no macros or
318     // indentation to worry about), but we need the boundaries of particular
319     // AST nodes and the printer doesn't provide this.
320     auto Range = clang::Lexer::makeFileCharRange(
321         CharSourceRange::getTokenRange(CFG->getDecl()->getSourceRange()),
322         AST.getSourceManager(), AST.getLangOpts());
323     if (Range.isInvalid())
324       return;
325     llvm::StringRef Code = clang::Lexer::getSourceText(
326         Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);
327     if (Invalid)
328       return;
329 
330     static constexpr unsigned Missing = -1;
331     // TokenInfo stores the BB and set of elements that a token is part of.
332     struct TokenInfo {
333       // The basic block this is part of.
334       // This is the BB of the stmt with the smallest containing range.
335       unsigned BB = Missing;
336       unsigned BBPriority = 0;
337       // The most specific stmt this is part of (smallest range).
338       unsigned Elt = Missing;
339       unsigned EltPriority = 0;
340       // All stmts this is part of.
341       SmallVector<unsigned> Elts;
342 
343       // Mark this token as being part of BB.Elt.
344       // RangeLen is the character length of the element's range, used to
345       // distinguish inner vs outer statements.
346       // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
347       // However "a" has a smaller range, so is more specific. Clicking on the
348       // token "a" should select the stmt "a".
349       void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
350         // A worse BB (larger range) => ignore.
351         if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
352           return;
353         if (BB != this->BB) {
354           this->BB = BB;
355           Elts.clear();
356           BBPriority = RangeLen;
357         }
358         BBPriority = std::min(BBPriority, RangeLen);
359         Elts.push_back(Elt);
360         if (this->Elt == Missing || EltPriority > RangeLen)
361           this->Elt = Elt;
362       }
363       bool operator==(const TokenInfo &Other) const {
364         return std::tie(BB, Elt, Elts) ==
365                std::tie(Other.BB, Other.Elt, Other.Elts);
366       }
367       // Write the attributes for the <span> on this token.
368       void write(llvm::raw_ostream &OS) const {
369         OS << "class='c";
370         if (BB != Missing)
371           OS << " " << blockID(BB);
372         for (unsigned Elt : Elts)
373           OS << " " << eltID(BB, Elt);
374         OS << "'";
375 
376         if (Elt != Missing)
377           OS << " data-elt='" << eltID(BB, Elt) << "'";
378         if (BB != Missing)
379           OS << " data-bb='" << blockID(BB) << "'";
380       }
381     };
382 
383     // Construct one TokenInfo per character in a flat array.
384     // This is inefficient (chars in a token all have the same info) but simple.
385     std::vector<TokenInfo> State(Code.size());
386     for (const auto *Block : CFG->getCFG()) {
387       unsigned EltIndex = 0;
388       for (const auto& Elt : *Block) {
389         ++EltIndex;
390         if (const auto S = Elt.getAs<CFGStmt>()) {
391           auto EltRange = clang::Lexer::makeFileCharRange(
392               CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
393               AST.getSourceManager(), AST.getLangOpts());
394           if (EltRange.isInvalid())
395             continue;
396           if (EltRange.getBegin() < Range.getBegin() ||
397               EltRange.getEnd() >= Range.getEnd() ||
398               EltRange.getEnd() < Range.getBegin() ||
399               EltRange.getEnd() >= Range.getEnd())
400             continue;
401           unsigned Off = EltRange.getBegin().getRawEncoding() -
402                          Range.getBegin().getRawEncoding();
403           unsigned Len = EltRange.getEnd().getRawEncoding() -
404                          EltRange.getBegin().getRawEncoding();
405           for (unsigned I = 0; I < Len; ++I)
406             State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
407         }
408       }
409     }
410 
411     // Finally, write the code with the correct <span>s.
412     unsigned Line =
413         AST.getSourceManager().getSpellingLineNumber(Range.getBegin());
414     *OS << "<template data-copy='code'>\n";
415     *OS << "<code class='filename'>";
416     llvm::printHTMLEscaped(
417         llvm::sys::path::filename(
418             AST.getSourceManager().getFilename(Range.getBegin())),
419         *OS);
420     *OS << "</code>";
421     *OS << "<code class='line' data-line='" << Line++ << "'>";
422     for (unsigned I = 0; I < Code.size(); ++I) {
423       // Don't actually write a <span> around each character, only break spans
424       // when the TokenInfo changes.
425       bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
426       bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
427       if (NeedOpen) {
428         *OS << "<span ";
429         State[I].write(*OS);
430         *OS << ">";
431       }
432       if (Code[I] == '\n')
433         *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";
434       else
435         llvm::printHTMLEscaped(Code.substr(I, 1), *OS);
436       if (NeedClose) *OS << "</span>";
437     }
438     *OS << "</code>\n";
439     *OS << "</template>";
440   }
441 
442   // Write the CFG diagram, a graph of basic blocks.
443   // Laying out graphs is hard, so we construct a graphviz description and shell
444   // out to `dot` to turn it into an SVG.
445   void writeCFG() {
446     *OS << "<template data-copy='cfg'>\n";
447     if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG())))
448       *OS << *SVG;
449     else
450       *OS << "Can't draw CFG: " << toString(SVG.takeError());
451     *OS << "</template>\n";
452   }
453 
454   // Produce a graphviz description of a CFG.
455   static std::string buildCFGDot(const clang::CFG &CFG) {
456     std::string Graph;
457     llvm::raw_string_ostream GraphS(Graph);
458     // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
459     GraphS << R"(digraph {
460       tooltip=" "
461       node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
462       edge[tooltip = " "]
463 )";
464     for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I)
465       GraphS << "  " << blockID(I) << " [id=" << blockID(I) << "]\n";
466     for (const auto *Block : CFG) {
467       for (const auto &Succ : Block->succs()) {
468         if (Succ.getReachableBlock())
469           GraphS << "  " << blockID(Block->getBlockID()) << " -> "
470                  << blockID(Succ.getReachableBlock()->getBlockID()) << "\n";
471       }
472     }
473     GraphS << "}\n";
474     return Graph;
475   }
476 };
477 
478 // Nothing interesting here, just subprocess/temp-file plumbing.
479 llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
480   std::string DotPath;
481   if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT"))
482     DotPath = FromEnv;
483   else {
484     auto FromPath = llvm::sys::findProgramByName("dot");
485     if (!FromPath)
486       return llvm::createStringError(FromPath.getError(),
487                                      "'dot' not found on PATH");
488     DotPath = FromPath.get();
489   }
490 
491   // Create input and output files for `dot` subprocess.
492   // (We create the output file as empty, to reserve the temp filename).
493   llvm::SmallString<256> Input, Output;
494   int InputFD;
495   if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
496                                                    Input))
497     return llvm::createStringError(EC, "failed to create `dot` temp input");
498   llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
499   auto DeleteInput =
500       llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });
501   if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
502     return llvm::createStringError(EC, "failed to create `dot` temp output");
503   auto DeleteOutput =
504       llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });
505 
506   std::vector<std::optional<llvm::StringRef>> Redirects = {
507       Input, Output,
508       /*stderr=*/std::nullopt};
509   std::string ErrMsg;
510   int Code = llvm::sys::ExecuteAndWait(
511       DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,
512       /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
513   if (!ErrMsg.empty())
514     return llvm::createStringError(llvm::inconvertibleErrorCode(),
515                                    "'dot' failed: " + ErrMsg);
516   if (Code != 0)
517     return llvm::createStringError(llvm::inconvertibleErrorCode(),
518                                    "'dot' failed (" + llvm::Twine(Code) + ")");
519 
520   auto Buf = llvm::MemoryBuffer::getFile(Output);
521   if (!Buf)
522     return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
523 
524   // Output has <?xml> prefix we don't want. Skip to <svg> tag.
525   llvm::StringRef Result = Buf.get()->getBuffer();
526   auto Pos = Result.find("<svg");
527   if (Pos == llvm::StringRef::npos)
528     return llvm::createStringError(llvm::inconvertibleErrorCode(),
529                                    "Can't find <svg> tag in `dot` output");
530   return Result.substr(Pos).str();
531 }
532 
533 } // namespace
534 
535 std::unique_ptr<Logger>
536 Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
537   return std::make_unique<HTMLLogger>(std::move(Streams));
538 }
539 
540 } // namespace clang::dataflow
541