xref: /llvm-project/clang/lib/Analysis/FlowSensitive/HTMLLogger.cpp (revision 564fd62aedfde6358baa1776a2de975b45bc7778)
1 //===-- HTMLLogger.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the HTML logger. Given a directory dir/, we write
10 // dir/0.html for the first analysis, etc.
11 // These files contain a visualization that allows inspecting the CFG and the
12 // state of the analysis at each point.
13 // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded
14 // so each output file is self-contained.
15 //
16 // VIEWS
17 //
18 // The timeline and function view are always shown. These allow selecting basic
19 // blocks, statements within them, and processing iterations (BBs are visited
20 // multiple times when e.g. loops are involved).
21 // These are written directly into the HTML body.
22 //
23 // There are also listings of particular basic blocks, and dumps of the state
24 // at particular analysis points (i.e. BB2 iteration 3 statement 2).
25 // These are only shown when the relevant BB/analysis point is *selected*.
26 //
27 // DATA AND TEMPLATES
28 //
29 // The HTML proper is mostly static.
30 // The analysis data is in a JSON object HTMLLoggerData which is embedded as
31 // a <script> in the <head>.
32 // This gets rendered into DOM by a simple template processor which substitutes
33 // the data into <template> tags embedded in the HTML. (see inflate() in JS).
34 //
35 // SELECTION
36 //
37 // This is the only real interactive mechanism.
38 //
39 // At any given time, there are several named selections, e.g.:
40 //   bb: B2               (basic block 0 is selected)
41 //   elt: B2.4            (statement 4 is selected)
42 //   iter: B2:1           (iteration 1 of the basic block is selected)
43 //   hover: B3            (hovering over basic block 3)
44 //
45 // The selection is updated by mouse events: hover by moving the mouse and
46 // others by clicking. Elements that are click targets generally have attributes
47 // (id or data-foo) that define what they should select.
48 // See watchSelection() in JS for the exact logic.
49 //
50 // When the "bb" selection is set to "B2":
51 //   - sections <section data-selection="bb"> get shown
52 //   - templates under such sections get re-rendered
53 //   - elements with class/id "B2" get class "bb-select"
54 //
55 //===----------------------------------------------------------------------===//
56 
57 #include "clang/Analysis/FlowSensitive/AdornedCFG.h"
58 #include "clang/Analysis/FlowSensitive/DebugSupport.h"
59 #include "clang/Analysis/FlowSensitive/Logger.h"
60 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
61 #include "clang/Analysis/FlowSensitive/Value.h"
62 #include "clang/Basic/SourceManager.h"
63 #include "clang/Lex/Lexer.h"
64 #include "llvm/ADT/DenseMap.h"
65 #include "llvm/ADT/ScopeExit.h"
66 #include "llvm/Support/Error.h"
67 #include "llvm/Support/FormatVariadic.h"
68 #include "llvm/Support/JSON.h"
69 #include "llvm/Support/Program.h"
70 #include "llvm/Support/ScopedPrinter.h"
71 #include "llvm/Support/raw_ostream.h"
72 // Defines assets: HTMLLogger_{html_js,css}
73 #include "HTMLLogger.inc"
74 
75 namespace clang::dataflow {
76 namespace {
77 
78 // Render a graphviz graph specification to SVG using the `dot` tool.
79 llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph);
80 
81 using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>;
82 
83 // Recursively dumps Values/StorageLocations as JSON
84 class ModelDumper {
85 public:
86   ModelDumper(llvm::json::OStream &JOS, const Environment &Env)
87       : JOS(JOS), Env(Env) {}
88 
89   void dump(Value &V) {
90     JOS.attribute("value_id", llvm::to_string(&V));
91     if (!Visited.insert(&V).second)
92       return;
93 
94     JOS.attribute("kind", debugString(V.getKind()));
95 
96     switch (V.getKind()) {
97     case Value::Kind::Integer:
98     case Value::Kind::TopBool:
99     case Value::Kind::AtomicBool:
100     case Value::Kind::FormulaBool:
101       break;
102     case Value::Kind::Pointer:
103       JOS.attributeObject(
104           "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); });
105       break;
106     }
107 
108     for (const auto& Prop : V.properties())
109       JOS.attributeObject(("p:" + Prop.first()).str(),
110                           [&] { dump(*Prop.second); });
111 
112     // Running the SAT solver is expensive, but knowing which booleans are
113     // guaranteed true/false here is valuable and hard to determine by hand.
114     if (auto *B = llvm::dyn_cast<BoolValue>(&V)) {
115       JOS.attribute("formula", llvm::to_string(B->formula()));
116       JOS.attribute("truth", Env.proves(B->formula()) ? "true"
117                              : Env.proves(Env.arena().makeNot(B->formula()))
118                                  ? "false"
119                                  : "unknown");
120     }
121   }
122   void dump(const StorageLocation &L) {
123     JOS.attribute("location", llvm::to_string(&L));
124     if (!Visited.insert(&L).second)
125       return;
126 
127     JOS.attribute("type", L.getType().getAsString());
128     if (!L.getType()->isRecordType())
129       if (auto *V = Env.getValue(L))
130         dump(*V);
131 
132     if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) {
133       for (const auto &Child : RLoc->children())
134         JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] {
135           if (Child.second)
136             dump(*Child.second);
137         });
138 
139       for (const auto &SyntheticField : RLoc->synthetic_fields())
140         JOS.attributeObject(("sf:" + SyntheticField.first()).str(),
141                             [&] { dump(*SyntheticField.second); });
142     }
143   }
144 
145   llvm::DenseSet<const void*> Visited;
146   llvm::json::OStream &JOS;
147   const Environment &Env;
148 };
149 
150 class HTMLLogger : public Logger {
151   struct Iteration {
152     const CFGBlock *Block;
153     unsigned Iter;
154     bool PostVisit;
155     bool Converged;
156   };
157 
158   StreamFactory Streams;
159   std::unique_ptr<llvm::raw_ostream> OS;
160   std::string JSON;
161   llvm::raw_string_ostream JStringStream{JSON};
162   llvm::json::OStream JOS{JStringStream, /*Indent=*/2};
163 
164   const AdornedCFG *ACFG;
165   // Timeline of iterations of CFG block visitation.
166   std::vector<Iteration> Iters;
167   // Indexes  in `Iters` of the iterations for each block.
168   llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters;
169   // For a given block ID, did the block converge (on the last iteration)?
170   llvm::BitVector BlockConverged;
171   // The messages logged in the current context but not yet written.
172   std::string ContextLogs;
173   // The number of elements we have visited within the current CFG block.
174   unsigned ElementIndex;
175 
176 public:
177   explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {}
178   void beginAnalysis(const AdornedCFG &ACFG,
179                      TypeErasedDataflowAnalysis &A) override {
180     OS = Streams();
181     this->ACFG = &ACFG;
182     *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first;
183 
184     BlockConverged.resize(ACFG.getCFG().getNumBlockIDs());
185 
186     const auto &D = ACFG.getDecl();
187     const auto &SM = A.getASTContext().getSourceManager();
188     *OS << "<title>";
189     if (const auto *ND = dyn_cast<NamedDecl>(&D))
190       *OS << ND->getNameAsString() << " at ";
191     *OS << SM.getFilename(D.getLocation()) << ":"
192         << SM.getSpellingLineNumber(D.getLocation());
193     *OS << "</title>\n";
194 
195     *OS << "<style>" << HTMLLogger_css << "</style>\n";
196     *OS << "<script>" << HTMLLogger_js << "</script>\n";
197 
198     writeCode();
199     JOS.objectBegin();
200     JOS.attributeBegin("states");
201     JOS.objectBegin();
202   }
203   // Between beginAnalysis() and endAnalysis() we write all the states for
204   // particular analysis points into the `timeline` array.
205   void endAnalysis() override {
206     JOS.objectEnd();
207     JOS.attributeEnd();
208 
209     JOS.attributeArray("timeline", [&] {
210       for (const auto &E : Iters) {
211         JOS.object([&] {
212           JOS.attribute("block", blockID(E.Block->getBlockID()));
213           JOS.attribute("iter", E.Iter);
214           JOS.attribute("post_visit", E.PostVisit);
215           JOS.attribute("converged", E.Converged);
216         });
217       }
218     });
219     JOS.attributeObject("cfg", [&] {
220       for (const auto &E : BlockIters)
221         writeBlock(*E.first, E.second);
222     });
223 
224     JOS.objectEnd();
225 
226     writeCFG();
227 
228     *OS << "<script>var HTMLLoggerData = \n";
229     *OS << JSON;
230     *OS << ";\n</script>\n";
231     *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second;
232   }
233 
234   void enterBlock(const CFGBlock &B, bool PostVisit) override {
235     llvm::SmallVector<size_t> &BIter = BlockIters[&B];
236     unsigned IterNum = BIter.size() + 1;
237     BIter.push_back(Iters.size());
238     Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false});
239     if (!PostVisit)
240       BlockConverged[B.getBlockID()] = false;
241     ElementIndex = 0;
242   }
243   void enterElement(const CFGElement &E) override {
244     ++ElementIndex;
245   }
246 
247   static std::string blockID(unsigned Block) {
248     return llvm::formatv("B{0}", Block);
249   }
250   static std::string eltID(unsigned Block, unsigned Element) {
251     return llvm::formatv("B{0}.{1}", Block, Element);
252   }
253   static std::string iterID(unsigned Block, unsigned Iter) {
254     return llvm::formatv("B{0}:{1}", Block, Iter);
255   }
256   static std::string elementIterID(unsigned Block, unsigned Iter,
257                                    unsigned Element) {
258     return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element);
259   }
260 
261   // Write the analysis state associated with a particular analysis point.
262   // FIXME: this dump is fairly opaque. We should show:
263   //  - values associated with the current Stmt
264   //  - values associated with its children
265   //  - meaningful names for values
266   //  - which boolean values are implied true/false by the flow condition
267   void recordState(TypeErasedDataflowAnalysisState &State) override {
268     unsigned Block = Iters.back().Block->getBlockID();
269     unsigned Iter = Iters.back().Iter;
270     bool PostVisit = Iters.back().PostVisit;
271     JOS.attributeObject(elementIterID(Block, Iter, ElementIndex), [&] {
272       JOS.attribute("block", blockID(Block));
273       JOS.attribute("iter", Iter);
274       JOS.attribute("post_visit", PostVisit);
275       JOS.attribute("element", ElementIndex);
276 
277       // If this state immediately follows an Expr, show its built-in model.
278       if (ElementIndex > 0) {
279         auto S =
280             Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>();
281         if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) {
282           if (E->isPRValue()) {
283             if (!E->getType()->isRecordType())
284               if (auto *V = State.Env.getValue(*E))
285                 JOS.attributeObject(
286                     "value", [&] { ModelDumper(JOS, State.Env).dump(*V); });
287           } else {
288             if (auto *Loc = State.Env.getStorageLocation(*E))
289               JOS.attributeObject(
290                   "value", [&] { ModelDumper(JOS, State.Env).dump(*Loc); });
291           }
292         }
293       }
294       if (!ContextLogs.empty()) {
295         JOS.attribute("logs", ContextLogs);
296         ContextLogs.clear();
297       }
298       {
299         std::string BuiltinLattice;
300         llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice);
301         State.Env.dump(BuiltinLatticeS);
302         JOS.attribute("builtinLattice", BuiltinLattice);
303       }
304     });
305   }
306   void blockConverged() override {
307     Iters.back().Converged = true;
308     BlockConverged[Iters.back().Block->getBlockID()] = true;
309   }
310 
311   void logText(llvm::StringRef S) override {
312     ContextLogs.append(S.begin(), S.end());
313     ContextLogs.push_back('\n');
314   }
315 
316 private:
317   // Write the CFG block details.
318   // Currently this is just the list of elements in execution order.
319   // FIXME: an AST dump would be a useful view, too.
320   void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) {
321     JOS.attributeObject(blockID(B.getBlockID()), [&] {
322       JOS.attributeArray("iters", [&] {
323         for (size_t IterIdx : ItersForB) {
324           const Iteration &Iter = Iters[IterIdx];
325           JOS.object([&] {
326             JOS.attribute("iter", Iter.Iter);
327             JOS.attribute("post_visit", Iter.PostVisit);
328             JOS.attribute("converged", Iter.Converged);
329           });
330         }
331       });
332       JOS.attributeArray("elements", [&] {
333         for (const auto &Elt : B.Elements) {
334           std::string Dump;
335           llvm::raw_string_ostream DumpS(Dump);
336           Elt.dumpToStream(DumpS);
337           JOS.value(Dump);
338         }
339       });
340     });
341   }
342 
343   // Write the code of function being examined.
344   // We want to overlay the code with <span>s that mark which BB particular
345   // tokens are associated with, and even which BB element (so that clicking
346   // can select the right element).
347   void writeCode() {
348     const auto &AST = ACFG->getDecl().getASTContext();
349     bool Invalid = false;
350 
351     // Extract the source code from the original file.
352     // Pretty-printing from the AST would probably be nicer (no macros or
353     // indentation to worry about), but we need the boundaries of particular
354     // AST nodes and the printer doesn't provide this.
355     auto Range = clang::Lexer::makeFileCharRange(
356         CharSourceRange::getTokenRange(ACFG->getDecl().getSourceRange()),
357         AST.getSourceManager(), AST.getLangOpts());
358     if (Range.isInvalid())
359       return;
360     llvm::StringRef Code = clang::Lexer::getSourceText(
361         Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid);
362     if (Invalid)
363       return;
364 
365     // TokenInfo stores the BB and set of elements that a token is part of.
366     struct TokenInfo {
367       enum : unsigned { Missing = static_cast<unsigned>(-1) };
368 
369       // The basic block this is part of.
370       // This is the BB of the stmt with the smallest containing range.
371       unsigned BB = Missing;
372       unsigned BBPriority = 0;
373       // The most specific stmt this is part of (smallest range).
374       unsigned Elt = Missing;
375       unsigned EltPriority = 0;
376       // All stmts this is part of.
377       SmallVector<unsigned> Elts;
378 
379       // Mark this token as being part of BB.Elt.
380       // RangeLen is the character length of the element's range, used to
381       // distinguish inner vs outer statements.
382       // For example in `a==0`, token "a" is part of the stmts "a" and "a==0".
383       // However "a" has a smaller range, so is more specific. Clicking on the
384       // token "a" should select the stmt "a".
385       void assign(unsigned BB, unsigned Elt, unsigned RangeLen) {
386         // A worse BB (larger range) => ignore.
387         if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen)
388           return;
389         if (BB != this->BB) {
390           this->BB = BB;
391           Elts.clear();
392           BBPriority = RangeLen;
393         }
394         BBPriority = std::min(BBPriority, RangeLen);
395         Elts.push_back(Elt);
396         if (this->Elt == Missing || EltPriority > RangeLen)
397           this->Elt = Elt;
398       }
399       bool operator==(const TokenInfo &Other) const {
400         return std::tie(BB, Elt, Elts) ==
401                std::tie(Other.BB, Other.Elt, Other.Elts);
402       }
403       // Write the attributes for the <span> on this token.
404       void write(llvm::raw_ostream &OS) const {
405         OS << "class='c";
406         if (BB != Missing)
407           OS << " " << blockID(BB);
408         for (unsigned Elt : Elts)
409           OS << " " << eltID(BB, Elt);
410         OS << "'";
411 
412         if (Elt != Missing)
413           OS << " data-elt='" << eltID(BB, Elt) << "'";
414         if (BB != Missing)
415           OS << " data-bb='" << blockID(BB) << "'";
416       }
417     };
418 
419     // Construct one TokenInfo per character in a flat array.
420     // This is inefficient (chars in a token all have the same info) but simple.
421     std::vector<TokenInfo> State(Code.size());
422     for (const auto *Block : ACFG->getCFG()) {
423       unsigned EltIndex = 0;
424       for (const auto& Elt : *Block) {
425         ++EltIndex;
426         if (const auto S = Elt.getAs<CFGStmt>()) {
427           auto EltRange = clang::Lexer::makeFileCharRange(
428               CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()),
429               AST.getSourceManager(), AST.getLangOpts());
430           if (EltRange.isInvalid())
431             continue;
432           if (EltRange.getBegin() < Range.getBegin() ||
433               EltRange.getEnd() >= Range.getEnd() ||
434               EltRange.getEnd() < Range.getBegin() ||
435               EltRange.getEnd() >= Range.getEnd())
436             continue;
437           unsigned Off = EltRange.getBegin().getRawEncoding() -
438                          Range.getBegin().getRawEncoding();
439           unsigned Len = EltRange.getEnd().getRawEncoding() -
440                          EltRange.getBegin().getRawEncoding();
441           for (unsigned I = 0; I < Len; ++I)
442             State[Off + I].assign(Block->getBlockID(), EltIndex, Len);
443         }
444       }
445     }
446 
447     // Finally, write the code with the correct <span>s.
448     unsigned Line =
449         AST.getSourceManager().getSpellingLineNumber(Range.getBegin());
450     *OS << "<template data-copy='code'>\n";
451     *OS << "<code class='filename'>";
452     llvm::printHTMLEscaped(
453         llvm::sys::path::filename(
454             AST.getSourceManager().getFilename(Range.getBegin())),
455         *OS);
456     *OS << "</code>";
457     *OS << "<code class='line' data-line='" << Line++ << "'>";
458     for (unsigned I = 0; I < Code.size(); ++I) {
459       // Don't actually write a <span> around each character, only break spans
460       // when the TokenInfo changes.
461       bool NeedOpen = I == 0 || !(State[I] == State[I-1]);
462       bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]);
463       if (NeedOpen) {
464         *OS << "<span ";
465         State[I].write(*OS);
466         *OS << ">";
467       }
468       if (Code[I] == '\n')
469         *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>";
470       else
471         llvm::printHTMLEscaped(Code.substr(I, 1), *OS);
472       if (NeedClose) *OS << "</span>";
473     }
474     *OS << "</code>\n";
475     *OS << "</template>";
476   }
477 
478   // Write the CFG diagram, a graph of basic blocks.
479   // Laying out graphs is hard, so we construct a graphviz description and shell
480   // out to `dot` to turn it into an SVG.
481   void writeCFG() {
482     *OS << "<template data-copy='cfg'>\n";
483     if (auto SVG = renderSVG(buildCFGDot(ACFG->getCFG())))
484       *OS << *SVG;
485     else
486       *OS << "Can't draw CFG: " << toString(SVG.takeError());
487     *OS << "</template>\n";
488   }
489 
490   // Produce a graphviz description of a CFG.
491   std::string buildCFGDot(const clang::CFG &CFG) {
492     std::string Graph;
493     llvm::raw_string_ostream GraphS(Graph);
494     // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses.
495     GraphS << R"(digraph {
496       tooltip=" "
497       node[class=bb, shape=square, fontname="sans-serif", tooltip=" "]
498       edge[tooltip = " "]
499 )";
500     for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) {
501       std::string Name = blockID(I);
502       // Rightwards arrow, vertical line
503       const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c";
504       if (BlockConverged[I])
505         Name += ConvergenceMarker;
506       GraphS << "  " << blockID(I) << " [id=" << blockID(I) << " label=\""
507              << Name << "\"]\n";
508     }
509     for (const auto *Block : CFG) {
510       for (const auto &Succ : Block->succs()) {
511         if (Succ.getReachableBlock())
512           GraphS << "  " << blockID(Block->getBlockID()) << " -> "
513                  << blockID(Succ.getReachableBlock()->getBlockID()) << "\n";
514       }
515     }
516     GraphS << "}\n";
517     return Graph;
518   }
519 };
520 
521 // Nothing interesting here, just subprocess/temp-file plumbing.
522 llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) {
523   std::string DotPath;
524   if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT"))
525     DotPath = FromEnv;
526   else {
527     auto FromPath = llvm::sys::findProgramByName("dot");
528     if (!FromPath)
529       return llvm::createStringError(FromPath.getError(),
530                                      "'dot' not found on PATH");
531     DotPath = FromPath.get();
532   }
533 
534   // Create input and output files for `dot` subprocess.
535   // (We create the output file as empty, to reserve the temp filename).
536   llvm::SmallString<256> Input, Output;
537   int InputFD;
538   if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD,
539                                                    Input))
540     return llvm::createStringError(EC, "failed to create `dot` temp input");
541   llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph;
542   auto DeleteInput =
543       llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); });
544   if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output))
545     return llvm::createStringError(EC, "failed to create `dot` temp output");
546   auto DeleteOutput =
547       llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); });
548 
549   std::vector<std::optional<llvm::StringRef>> Redirects = {
550       Input, Output,
551       /*stderr=*/std::nullopt};
552   std::string ErrMsg;
553   int Code = llvm::sys::ExecuteAndWait(
554       DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects,
555       /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg);
556   if (!ErrMsg.empty())
557     return llvm::createStringError(llvm::inconvertibleErrorCode(),
558                                    "'dot' failed: " + ErrMsg);
559   if (Code != 0)
560     return llvm::createStringError(llvm::inconvertibleErrorCode(),
561                                    "'dot' failed (" + llvm::Twine(Code) + ")");
562 
563   auto Buf = llvm::MemoryBuffer::getFile(Output);
564   if (!Buf)
565     return llvm::createStringError(Buf.getError(), "Can't read `dot` output");
566 
567   // Output has <?xml> prefix we don't want. Skip to <svg> tag.
568   llvm::StringRef Result = Buf.get()->getBuffer();
569   auto Pos = Result.find("<svg");
570   if (Pos == llvm::StringRef::npos)
571     return llvm::createStringError(llvm::inconvertibleErrorCode(),
572                                    "Can't find <svg> tag in `dot` output");
573   return Result.substr(Pos).str();
574 }
575 
576 } // namespace
577 
578 std::unique_ptr<Logger>
579 Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) {
580   return std::make_unique<HTMLLogger>(std::move(Streams));
581 }
582 
583 } // namespace clang::dataflow
584