106c3fb27SDimitry Andric //===-- HTMLLogger.cpp ----------------------------------------------------===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric // 906c3fb27SDimitry Andric // This file implements the HTML logger. Given a directory dir/, we write 1006c3fb27SDimitry Andric // dir/0.html for the first analysis, etc. 1106c3fb27SDimitry Andric // These files contain a visualization that allows inspecting the CFG and the 1206c3fb27SDimitry Andric // state of the analysis at each point. 1306c3fb27SDimitry Andric // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded 1406c3fb27SDimitry Andric // so each output file is self-contained. 1506c3fb27SDimitry Andric // 1606c3fb27SDimitry Andric // VIEWS 1706c3fb27SDimitry Andric // 1806c3fb27SDimitry Andric // The timeline and function view are always shown. These allow selecting basic 1906c3fb27SDimitry Andric // blocks, statements within them, and processing iterations (BBs are visited 2006c3fb27SDimitry Andric // multiple times when e.g. loops are involved). 2106c3fb27SDimitry Andric // These are written directly into the HTML body. 2206c3fb27SDimitry Andric // 2306c3fb27SDimitry Andric // There are also listings of particular basic blocks, and dumps of the state 2406c3fb27SDimitry Andric // at particular analysis points (i.e. BB2 iteration 3 statement 2). 2506c3fb27SDimitry Andric // These are only shown when the relevant BB/analysis point is *selected*. 2606c3fb27SDimitry Andric // 2706c3fb27SDimitry Andric // DATA AND TEMPLATES 2806c3fb27SDimitry Andric // 2906c3fb27SDimitry Andric // The HTML proper is mostly static. 3006c3fb27SDimitry Andric // The analysis data is in a JSON object HTMLLoggerData which is embedded as 3106c3fb27SDimitry Andric // a <script> in the <head>. 3206c3fb27SDimitry Andric // This gets rendered into DOM by a simple template processor which substitutes 3306c3fb27SDimitry Andric // the data into <template> tags embedded in the HTML. (see inflate() in JS). 3406c3fb27SDimitry Andric // 3506c3fb27SDimitry Andric // SELECTION 3606c3fb27SDimitry Andric // 3706c3fb27SDimitry Andric // This is the only real interactive mechanism. 3806c3fb27SDimitry Andric // 3906c3fb27SDimitry Andric // At any given time, there are several named selections, e.g.: 4006c3fb27SDimitry Andric // bb: B2 (basic block 0 is selected) 4106c3fb27SDimitry Andric // elt: B2.4 (statement 4 is selected) 4206c3fb27SDimitry Andric // iter: B2:1 (iteration 1 of the basic block is selected) 4306c3fb27SDimitry Andric // hover: B3 (hovering over basic block 3) 4406c3fb27SDimitry Andric // 4506c3fb27SDimitry Andric // The selection is updated by mouse events: hover by moving the mouse and 4606c3fb27SDimitry Andric // others by clicking. Elements that are click targets generally have attributes 4706c3fb27SDimitry Andric // (id or data-foo) that define what they should select. 4806c3fb27SDimitry Andric // See watchSelection() in JS for the exact logic. 4906c3fb27SDimitry Andric // 5006c3fb27SDimitry Andric // When the "bb" selection is set to "B2": 5106c3fb27SDimitry Andric // - sections <section data-selection="bb"> get shown 5206c3fb27SDimitry Andric // - templates under such sections get re-rendered 5306c3fb27SDimitry Andric // - elements with class/id "B2" get class "bb-select" 5406c3fb27SDimitry Andric // 5506c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 5606c3fb27SDimitry Andric 5706c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" 5806c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/DebugSupport.h" 5906c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/Logger.h" 6006c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" 6106c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/Value.h" 6206c3fb27SDimitry Andric #include "clang/Basic/SourceManager.h" 6306c3fb27SDimitry Andric #include "clang/Lex/Lexer.h" 6406c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 6506c3fb27SDimitry Andric #include "llvm/ADT/ScopeExit.h" 6606c3fb27SDimitry Andric #include "llvm/Support/Error.h" 6706c3fb27SDimitry Andric #include "llvm/Support/FormatVariadic.h" 6806c3fb27SDimitry Andric #include "llvm/Support/JSON.h" 6906c3fb27SDimitry Andric #include "llvm/Support/Program.h" 7006c3fb27SDimitry Andric #include "llvm/Support/ScopedPrinter.h" 7106c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h" 7206c3fb27SDimitry Andric // Defines assets: HTMLLogger_{html_js,css} 7306c3fb27SDimitry Andric #include "HTMLLogger.inc" 7406c3fb27SDimitry Andric 7506c3fb27SDimitry Andric namespace clang::dataflow { 7606c3fb27SDimitry Andric namespace { 7706c3fb27SDimitry Andric 7806c3fb27SDimitry Andric // Render a graphviz graph specification to SVG using the `dot` tool. 7906c3fb27SDimitry Andric llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph); 8006c3fb27SDimitry Andric 8106c3fb27SDimitry Andric using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>; 8206c3fb27SDimitry Andric 8306c3fb27SDimitry Andric // Recursively dumps Values/StorageLocations as JSON 8406c3fb27SDimitry Andric class ModelDumper { 8506c3fb27SDimitry Andric public: 8606c3fb27SDimitry Andric ModelDumper(llvm::json::OStream &JOS, const Environment &Env) 8706c3fb27SDimitry Andric : JOS(JOS), Env(Env) {} 8806c3fb27SDimitry Andric 8906c3fb27SDimitry Andric void dump(Value &V) { 9006c3fb27SDimitry Andric JOS.attribute("value_id", llvm::to_string(&V)); 9106c3fb27SDimitry Andric if (!Visited.insert(&V).second) 9206c3fb27SDimitry Andric return; 9306c3fb27SDimitry Andric 9406c3fb27SDimitry Andric JOS.attribute("kind", debugString(V.getKind())); 9506c3fb27SDimitry Andric 9606c3fb27SDimitry Andric switch (V.getKind()) { 9706c3fb27SDimitry Andric case Value::Kind::Integer: 98*5f757f3fSDimitry Andric case Value::Kind::Record: 9906c3fb27SDimitry Andric case Value::Kind::TopBool: 10006c3fb27SDimitry Andric case Value::Kind::AtomicBool: 10106c3fb27SDimitry Andric case Value::Kind::FormulaBool: 10206c3fb27SDimitry Andric break; 10306c3fb27SDimitry Andric case Value::Kind::Pointer: 10406c3fb27SDimitry Andric JOS.attributeObject( 10506c3fb27SDimitry Andric "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); }); 10606c3fb27SDimitry Andric break; 10706c3fb27SDimitry Andric } 10806c3fb27SDimitry Andric 10906c3fb27SDimitry Andric for (const auto& Prop : V.properties()) 11006c3fb27SDimitry Andric JOS.attributeObject(("p:" + Prop.first()).str(), 11106c3fb27SDimitry Andric [&] { dump(*Prop.second); }); 11206c3fb27SDimitry Andric 11306c3fb27SDimitry Andric // Running the SAT solver is expensive, but knowing which booleans are 11406c3fb27SDimitry Andric // guaranteed true/false here is valuable and hard to determine by hand. 11506c3fb27SDimitry Andric if (auto *B = llvm::dyn_cast<BoolValue>(&V)) { 11606c3fb27SDimitry Andric JOS.attribute("formula", llvm::to_string(B->formula())); 117*5f757f3fSDimitry Andric JOS.attribute("truth", Env.proves(B->formula()) ? "true" 118*5f757f3fSDimitry Andric : Env.proves(Env.arena().makeNot(B->formula())) 11906c3fb27SDimitry Andric ? "false" 12006c3fb27SDimitry Andric : "unknown"); 12106c3fb27SDimitry Andric } 12206c3fb27SDimitry Andric } 12306c3fb27SDimitry Andric void dump(const StorageLocation &L) { 12406c3fb27SDimitry Andric JOS.attribute("location", llvm::to_string(&L)); 12506c3fb27SDimitry Andric if (!Visited.insert(&L).second) 12606c3fb27SDimitry Andric return; 12706c3fb27SDimitry Andric 12806c3fb27SDimitry Andric JOS.attribute("type", L.getType().getAsString()); 12906c3fb27SDimitry Andric if (auto *V = Env.getValue(L)) 13006c3fb27SDimitry Andric dump(*V); 131*5f757f3fSDimitry Andric 132*5f757f3fSDimitry Andric if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) { 133*5f757f3fSDimitry Andric for (const auto &Child : RLoc->children()) 134*5f757f3fSDimitry Andric JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] { 135*5f757f3fSDimitry Andric if (Child.second) 136*5f757f3fSDimitry Andric if (Value *Val = Env.getValue(*Child.second)) 137*5f757f3fSDimitry Andric dump(*Val); 138*5f757f3fSDimitry Andric }); 139*5f757f3fSDimitry Andric 140*5f757f3fSDimitry Andric for (const auto &SyntheticField : RLoc->synthetic_fields()) 141*5f757f3fSDimitry Andric JOS.attributeObject(("sf:" + SyntheticField.first()).str(), 142*5f757f3fSDimitry Andric [&] { dump(*SyntheticField.second); }); 143*5f757f3fSDimitry Andric } 14406c3fb27SDimitry Andric } 14506c3fb27SDimitry Andric 14606c3fb27SDimitry Andric llvm::DenseSet<const void*> Visited; 14706c3fb27SDimitry Andric llvm::json::OStream &JOS; 14806c3fb27SDimitry Andric const Environment &Env; 14906c3fb27SDimitry Andric }; 15006c3fb27SDimitry Andric 15106c3fb27SDimitry Andric class HTMLLogger : public Logger { 152*5f757f3fSDimitry Andric struct Iteration { 153*5f757f3fSDimitry Andric const CFGBlock *Block; 154*5f757f3fSDimitry Andric unsigned Iter; 155*5f757f3fSDimitry Andric bool PostVisit; 156*5f757f3fSDimitry Andric bool Converged; 157*5f757f3fSDimitry Andric }; 158*5f757f3fSDimitry Andric 15906c3fb27SDimitry Andric StreamFactory Streams; 16006c3fb27SDimitry Andric std::unique_ptr<llvm::raw_ostream> OS; 16106c3fb27SDimitry Andric std::optional<llvm::json::OStream> JOS; 16206c3fb27SDimitry Andric 16306c3fb27SDimitry Andric const ControlFlowContext *CFG; 16406c3fb27SDimitry Andric // Timeline of iterations of CFG block visitation. 165*5f757f3fSDimitry Andric std::vector<Iteration> Iters; 166*5f757f3fSDimitry Andric // Indexes in `Iters` of the iterations for each block. 167*5f757f3fSDimitry Andric llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters; 16806c3fb27SDimitry Andric // The messages logged in the current context but not yet written. 16906c3fb27SDimitry Andric std::string ContextLogs; 17006c3fb27SDimitry Andric // The number of elements we have visited within the current CFG block. 17106c3fb27SDimitry Andric unsigned ElementIndex; 17206c3fb27SDimitry Andric 17306c3fb27SDimitry Andric public: 17406c3fb27SDimitry Andric explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {} 17506c3fb27SDimitry Andric void beginAnalysis(const ControlFlowContext &CFG, 17606c3fb27SDimitry Andric TypeErasedDataflowAnalysis &A) override { 17706c3fb27SDimitry Andric OS = Streams(); 17806c3fb27SDimitry Andric this->CFG = &CFG; 17906c3fb27SDimitry Andric *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first; 18006c3fb27SDimitry Andric 181*5f757f3fSDimitry Andric const auto &D = CFG.getDecl(); 18206c3fb27SDimitry Andric const auto &SM = A.getASTContext().getSourceManager(); 18306c3fb27SDimitry Andric *OS << "<title>"; 184*5f757f3fSDimitry Andric if (const auto *ND = dyn_cast<NamedDecl>(&D)) 18506c3fb27SDimitry Andric *OS << ND->getNameAsString() << " at "; 186*5f757f3fSDimitry Andric *OS << SM.getFilename(D.getLocation()) << ":" 187*5f757f3fSDimitry Andric << SM.getSpellingLineNumber(D.getLocation()); 18806c3fb27SDimitry Andric *OS << "</title>\n"; 18906c3fb27SDimitry Andric 19006c3fb27SDimitry Andric *OS << "<style>" << HTMLLogger_css << "</style>\n"; 19106c3fb27SDimitry Andric *OS << "<script>" << HTMLLogger_js << "</script>\n"; 19206c3fb27SDimitry Andric 19306c3fb27SDimitry Andric writeCode(); 19406c3fb27SDimitry Andric writeCFG(); 19506c3fb27SDimitry Andric 19606c3fb27SDimitry Andric *OS << "<script>var HTMLLoggerData = \n"; 19706c3fb27SDimitry Andric JOS.emplace(*OS, /*Indent=*/2); 19806c3fb27SDimitry Andric JOS->objectBegin(); 19906c3fb27SDimitry Andric JOS->attributeBegin("states"); 20006c3fb27SDimitry Andric JOS->objectBegin(); 20106c3fb27SDimitry Andric } 20206c3fb27SDimitry Andric // Between beginAnalysis() and endAnalysis() we write all the states for 20306c3fb27SDimitry Andric // particular analysis points into the `timeline` array. 20406c3fb27SDimitry Andric void endAnalysis() override { 20506c3fb27SDimitry Andric JOS->objectEnd(); 20606c3fb27SDimitry Andric JOS->attributeEnd(); 20706c3fb27SDimitry Andric 20806c3fb27SDimitry Andric JOS->attributeArray("timeline", [&] { 20906c3fb27SDimitry Andric for (const auto &E : Iters) { 21006c3fb27SDimitry Andric JOS->object([&] { 211*5f757f3fSDimitry Andric JOS->attribute("block", blockID(E.Block->getBlockID())); 212*5f757f3fSDimitry Andric JOS->attribute("iter", E.Iter); 213*5f757f3fSDimitry Andric JOS->attribute("post_visit", E.PostVisit); 214*5f757f3fSDimitry Andric JOS->attribute("converged", E.Converged); 21506c3fb27SDimitry Andric }); 21606c3fb27SDimitry Andric } 21706c3fb27SDimitry Andric }); 21806c3fb27SDimitry Andric JOS->attributeObject("cfg", [&] { 21906c3fb27SDimitry Andric for (const auto &E : BlockIters) 22006c3fb27SDimitry Andric writeBlock(*E.first, E.second); 22106c3fb27SDimitry Andric }); 22206c3fb27SDimitry Andric 22306c3fb27SDimitry Andric JOS->objectEnd(); 22406c3fb27SDimitry Andric JOS.reset(); 22506c3fb27SDimitry Andric *OS << ";\n</script>\n"; 22606c3fb27SDimitry Andric *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second; 22706c3fb27SDimitry Andric } 22806c3fb27SDimitry Andric 229*5f757f3fSDimitry Andric void enterBlock(const CFGBlock &B, bool PostVisit) override { 230*5f757f3fSDimitry Andric llvm::SmallVector<size_t> &BIter = BlockIters[&B]; 231*5f757f3fSDimitry Andric unsigned IterNum = BIter.size() + 1; 232*5f757f3fSDimitry Andric BIter.push_back(Iters.size()); 233*5f757f3fSDimitry Andric Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false}); 23406c3fb27SDimitry Andric ElementIndex = 0; 23506c3fb27SDimitry Andric } 23606c3fb27SDimitry Andric void enterElement(const CFGElement &E) override { 23706c3fb27SDimitry Andric ++ElementIndex; 23806c3fb27SDimitry Andric } 23906c3fb27SDimitry Andric 24006c3fb27SDimitry Andric static std::string blockID(unsigned Block) { 24106c3fb27SDimitry Andric return llvm::formatv("B{0}", Block); 24206c3fb27SDimitry Andric } 24306c3fb27SDimitry Andric static std::string eltID(unsigned Block, unsigned Element) { 24406c3fb27SDimitry Andric return llvm::formatv("B{0}.{1}", Block, Element); 24506c3fb27SDimitry Andric } 24606c3fb27SDimitry Andric static std::string iterID(unsigned Block, unsigned Iter) { 24706c3fb27SDimitry Andric return llvm::formatv("B{0}:{1}", Block, Iter); 24806c3fb27SDimitry Andric } 24906c3fb27SDimitry Andric static std::string elementIterID(unsigned Block, unsigned Iter, 25006c3fb27SDimitry Andric unsigned Element) { 25106c3fb27SDimitry Andric return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element); 25206c3fb27SDimitry Andric } 25306c3fb27SDimitry Andric 25406c3fb27SDimitry Andric // Write the analysis state associated with a particular analysis point. 25506c3fb27SDimitry Andric // FIXME: this dump is fairly opaque. We should show: 25606c3fb27SDimitry Andric // - values associated with the current Stmt 25706c3fb27SDimitry Andric // - values associated with its children 25806c3fb27SDimitry Andric // - meaningful names for values 25906c3fb27SDimitry Andric // - which boolean values are implied true/false by the flow condition 26006c3fb27SDimitry Andric void recordState(TypeErasedDataflowAnalysisState &State) override { 261*5f757f3fSDimitry Andric unsigned Block = Iters.back().Block->getBlockID(); 262*5f757f3fSDimitry Andric unsigned Iter = Iters.back().Iter; 263*5f757f3fSDimitry Andric bool PostVisit = Iters.back().PostVisit; 26406c3fb27SDimitry Andric JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] { 26506c3fb27SDimitry Andric JOS->attribute("block", blockID(Block)); 26606c3fb27SDimitry Andric JOS->attribute("iter", Iter); 267*5f757f3fSDimitry Andric JOS->attribute("post_visit", PostVisit); 26806c3fb27SDimitry Andric JOS->attribute("element", ElementIndex); 26906c3fb27SDimitry Andric 27006c3fb27SDimitry Andric // If this state immediately follows an Expr, show its built-in model. 27106c3fb27SDimitry Andric if (ElementIndex > 0) { 27206c3fb27SDimitry Andric auto S = 273*5f757f3fSDimitry Andric Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>(); 274*5f757f3fSDimitry Andric if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) { 275*5f757f3fSDimitry Andric if (E->isPRValue()) { 276*5f757f3fSDimitry Andric if (auto *V = State.Env.getValue(*E)) 277*5f757f3fSDimitry Andric JOS->attributeObject( 278*5f757f3fSDimitry Andric "value", [&] { ModelDumper(*JOS, State.Env).dump(*V); }); 279*5f757f3fSDimitry Andric } else { 280*5f757f3fSDimitry Andric if (auto *Loc = State.Env.getStorageLocation(*E)) 28106c3fb27SDimitry Andric JOS->attributeObject( 28206c3fb27SDimitry Andric "value", [&] { ModelDumper(*JOS, State.Env).dump(*Loc); }); 28306c3fb27SDimitry Andric } 284*5f757f3fSDimitry Andric } 285*5f757f3fSDimitry Andric } 28606c3fb27SDimitry Andric if (!ContextLogs.empty()) { 28706c3fb27SDimitry Andric JOS->attribute("logs", ContextLogs); 28806c3fb27SDimitry Andric ContextLogs.clear(); 28906c3fb27SDimitry Andric } 29006c3fb27SDimitry Andric { 29106c3fb27SDimitry Andric std::string BuiltinLattice; 29206c3fb27SDimitry Andric llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice); 29306c3fb27SDimitry Andric State.Env.dump(BuiltinLatticeS); 29406c3fb27SDimitry Andric JOS->attribute("builtinLattice", BuiltinLattice); 29506c3fb27SDimitry Andric } 29606c3fb27SDimitry Andric }); 29706c3fb27SDimitry Andric } 298*5f757f3fSDimitry Andric void blockConverged() override { Iters.back().Converged = true; } 29906c3fb27SDimitry Andric 30006c3fb27SDimitry Andric void logText(llvm::StringRef S) override { 30106c3fb27SDimitry Andric ContextLogs.append(S.begin(), S.end()); 30206c3fb27SDimitry Andric ContextLogs.push_back('\n'); 30306c3fb27SDimitry Andric } 30406c3fb27SDimitry Andric 30506c3fb27SDimitry Andric private: 30606c3fb27SDimitry Andric // Write the CFG block details. 30706c3fb27SDimitry Andric // Currently this is just the list of elements in execution order. 30806c3fb27SDimitry Andric // FIXME: an AST dump would be a useful view, too. 309*5f757f3fSDimitry Andric void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) { 31006c3fb27SDimitry Andric JOS->attributeObject(blockID(B.getBlockID()), [&] { 311*5f757f3fSDimitry Andric JOS->attributeArray("iters", [&] { 312*5f757f3fSDimitry Andric for (size_t IterIdx : ItersForB) { 313*5f757f3fSDimitry Andric const Iteration &Iter = Iters[IterIdx]; 314*5f757f3fSDimitry Andric JOS->object([&] { 315*5f757f3fSDimitry Andric JOS->attribute("iter", Iter.Iter); 316*5f757f3fSDimitry Andric JOS->attribute("post_visit", Iter.PostVisit); 317*5f757f3fSDimitry Andric JOS->attribute("converged", Iter.Converged); 318*5f757f3fSDimitry Andric }); 319*5f757f3fSDimitry Andric } 320*5f757f3fSDimitry Andric }); 32106c3fb27SDimitry Andric JOS->attributeArray("elements", [&] { 32206c3fb27SDimitry Andric for (const auto &Elt : B.Elements) { 32306c3fb27SDimitry Andric std::string Dump; 32406c3fb27SDimitry Andric llvm::raw_string_ostream DumpS(Dump); 32506c3fb27SDimitry Andric Elt.dumpToStream(DumpS); 32606c3fb27SDimitry Andric JOS->value(Dump); 32706c3fb27SDimitry Andric } 32806c3fb27SDimitry Andric }); 32906c3fb27SDimitry Andric }); 33006c3fb27SDimitry Andric } 33106c3fb27SDimitry Andric 33206c3fb27SDimitry Andric // Write the code of function being examined. 33306c3fb27SDimitry Andric // We want to overlay the code with <span>s that mark which BB particular 33406c3fb27SDimitry Andric // tokens are associated with, and even which BB element (so that clicking 33506c3fb27SDimitry Andric // can select the right element). 33606c3fb27SDimitry Andric void writeCode() { 337*5f757f3fSDimitry Andric const auto &AST = CFG->getDecl().getASTContext(); 33806c3fb27SDimitry Andric bool Invalid = false; 33906c3fb27SDimitry Andric 34006c3fb27SDimitry Andric // Extract the source code from the original file. 34106c3fb27SDimitry Andric // Pretty-printing from the AST would probably be nicer (no macros or 34206c3fb27SDimitry Andric // indentation to worry about), but we need the boundaries of particular 34306c3fb27SDimitry Andric // AST nodes and the printer doesn't provide this. 34406c3fb27SDimitry Andric auto Range = clang::Lexer::makeFileCharRange( 345*5f757f3fSDimitry Andric CharSourceRange::getTokenRange(CFG->getDecl().getSourceRange()), 34606c3fb27SDimitry Andric AST.getSourceManager(), AST.getLangOpts()); 34706c3fb27SDimitry Andric if (Range.isInvalid()) 34806c3fb27SDimitry Andric return; 34906c3fb27SDimitry Andric llvm::StringRef Code = clang::Lexer::getSourceText( 35006c3fb27SDimitry Andric Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid); 35106c3fb27SDimitry Andric if (Invalid) 35206c3fb27SDimitry Andric return; 35306c3fb27SDimitry Andric 35406c3fb27SDimitry Andric // TokenInfo stores the BB and set of elements that a token is part of. 35506c3fb27SDimitry Andric struct TokenInfo { 356*5f757f3fSDimitry Andric enum : unsigned { Missing = static_cast<unsigned>(-1) }; 357*5f757f3fSDimitry Andric 35806c3fb27SDimitry Andric // The basic block this is part of. 35906c3fb27SDimitry Andric // This is the BB of the stmt with the smallest containing range. 36006c3fb27SDimitry Andric unsigned BB = Missing; 36106c3fb27SDimitry Andric unsigned BBPriority = 0; 36206c3fb27SDimitry Andric // The most specific stmt this is part of (smallest range). 36306c3fb27SDimitry Andric unsigned Elt = Missing; 36406c3fb27SDimitry Andric unsigned EltPriority = 0; 36506c3fb27SDimitry Andric // All stmts this is part of. 36606c3fb27SDimitry Andric SmallVector<unsigned> Elts; 36706c3fb27SDimitry Andric 36806c3fb27SDimitry Andric // Mark this token as being part of BB.Elt. 36906c3fb27SDimitry Andric // RangeLen is the character length of the element's range, used to 37006c3fb27SDimitry Andric // distinguish inner vs outer statements. 37106c3fb27SDimitry Andric // For example in `a==0`, token "a" is part of the stmts "a" and "a==0". 37206c3fb27SDimitry Andric // However "a" has a smaller range, so is more specific. Clicking on the 37306c3fb27SDimitry Andric // token "a" should select the stmt "a". 37406c3fb27SDimitry Andric void assign(unsigned BB, unsigned Elt, unsigned RangeLen) { 37506c3fb27SDimitry Andric // A worse BB (larger range) => ignore. 37606c3fb27SDimitry Andric if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen) 37706c3fb27SDimitry Andric return; 37806c3fb27SDimitry Andric if (BB != this->BB) { 37906c3fb27SDimitry Andric this->BB = BB; 38006c3fb27SDimitry Andric Elts.clear(); 38106c3fb27SDimitry Andric BBPriority = RangeLen; 38206c3fb27SDimitry Andric } 38306c3fb27SDimitry Andric BBPriority = std::min(BBPriority, RangeLen); 38406c3fb27SDimitry Andric Elts.push_back(Elt); 38506c3fb27SDimitry Andric if (this->Elt == Missing || EltPriority > RangeLen) 38606c3fb27SDimitry Andric this->Elt = Elt; 38706c3fb27SDimitry Andric } 38806c3fb27SDimitry Andric bool operator==(const TokenInfo &Other) const { 38906c3fb27SDimitry Andric return std::tie(BB, Elt, Elts) == 39006c3fb27SDimitry Andric std::tie(Other.BB, Other.Elt, Other.Elts); 39106c3fb27SDimitry Andric } 39206c3fb27SDimitry Andric // Write the attributes for the <span> on this token. 39306c3fb27SDimitry Andric void write(llvm::raw_ostream &OS) const { 39406c3fb27SDimitry Andric OS << "class='c"; 39506c3fb27SDimitry Andric if (BB != Missing) 39606c3fb27SDimitry Andric OS << " " << blockID(BB); 39706c3fb27SDimitry Andric for (unsigned Elt : Elts) 39806c3fb27SDimitry Andric OS << " " << eltID(BB, Elt); 39906c3fb27SDimitry Andric OS << "'"; 40006c3fb27SDimitry Andric 40106c3fb27SDimitry Andric if (Elt != Missing) 40206c3fb27SDimitry Andric OS << " data-elt='" << eltID(BB, Elt) << "'"; 40306c3fb27SDimitry Andric if (BB != Missing) 40406c3fb27SDimitry Andric OS << " data-bb='" << blockID(BB) << "'"; 40506c3fb27SDimitry Andric } 40606c3fb27SDimitry Andric }; 40706c3fb27SDimitry Andric 40806c3fb27SDimitry Andric // Construct one TokenInfo per character in a flat array. 40906c3fb27SDimitry Andric // This is inefficient (chars in a token all have the same info) but simple. 41006c3fb27SDimitry Andric std::vector<TokenInfo> State(Code.size()); 41106c3fb27SDimitry Andric for (const auto *Block : CFG->getCFG()) { 41206c3fb27SDimitry Andric unsigned EltIndex = 0; 41306c3fb27SDimitry Andric for (const auto& Elt : *Block) { 41406c3fb27SDimitry Andric ++EltIndex; 41506c3fb27SDimitry Andric if (const auto S = Elt.getAs<CFGStmt>()) { 41606c3fb27SDimitry Andric auto EltRange = clang::Lexer::makeFileCharRange( 41706c3fb27SDimitry Andric CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()), 41806c3fb27SDimitry Andric AST.getSourceManager(), AST.getLangOpts()); 41906c3fb27SDimitry Andric if (EltRange.isInvalid()) 42006c3fb27SDimitry Andric continue; 42106c3fb27SDimitry Andric if (EltRange.getBegin() < Range.getBegin() || 42206c3fb27SDimitry Andric EltRange.getEnd() >= Range.getEnd() || 42306c3fb27SDimitry Andric EltRange.getEnd() < Range.getBegin() || 42406c3fb27SDimitry Andric EltRange.getEnd() >= Range.getEnd()) 42506c3fb27SDimitry Andric continue; 42606c3fb27SDimitry Andric unsigned Off = EltRange.getBegin().getRawEncoding() - 42706c3fb27SDimitry Andric Range.getBegin().getRawEncoding(); 42806c3fb27SDimitry Andric unsigned Len = EltRange.getEnd().getRawEncoding() - 42906c3fb27SDimitry Andric EltRange.getBegin().getRawEncoding(); 43006c3fb27SDimitry Andric for (unsigned I = 0; I < Len; ++I) 43106c3fb27SDimitry Andric State[Off + I].assign(Block->getBlockID(), EltIndex, Len); 43206c3fb27SDimitry Andric } 43306c3fb27SDimitry Andric } 43406c3fb27SDimitry Andric } 43506c3fb27SDimitry Andric 43606c3fb27SDimitry Andric // Finally, write the code with the correct <span>s. 43706c3fb27SDimitry Andric unsigned Line = 43806c3fb27SDimitry Andric AST.getSourceManager().getSpellingLineNumber(Range.getBegin()); 43906c3fb27SDimitry Andric *OS << "<template data-copy='code'>\n"; 44006c3fb27SDimitry Andric *OS << "<code class='filename'>"; 44106c3fb27SDimitry Andric llvm::printHTMLEscaped( 44206c3fb27SDimitry Andric llvm::sys::path::filename( 44306c3fb27SDimitry Andric AST.getSourceManager().getFilename(Range.getBegin())), 44406c3fb27SDimitry Andric *OS); 44506c3fb27SDimitry Andric *OS << "</code>"; 44606c3fb27SDimitry Andric *OS << "<code class='line' data-line='" << Line++ << "'>"; 44706c3fb27SDimitry Andric for (unsigned I = 0; I < Code.size(); ++I) { 44806c3fb27SDimitry Andric // Don't actually write a <span> around each character, only break spans 44906c3fb27SDimitry Andric // when the TokenInfo changes. 45006c3fb27SDimitry Andric bool NeedOpen = I == 0 || !(State[I] == State[I-1]); 45106c3fb27SDimitry Andric bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]); 45206c3fb27SDimitry Andric if (NeedOpen) { 45306c3fb27SDimitry Andric *OS << "<span "; 45406c3fb27SDimitry Andric State[I].write(*OS); 45506c3fb27SDimitry Andric *OS << ">"; 45606c3fb27SDimitry Andric } 45706c3fb27SDimitry Andric if (Code[I] == '\n') 45806c3fb27SDimitry Andric *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>"; 45906c3fb27SDimitry Andric else 46006c3fb27SDimitry Andric llvm::printHTMLEscaped(Code.substr(I, 1), *OS); 46106c3fb27SDimitry Andric if (NeedClose) *OS << "</span>"; 46206c3fb27SDimitry Andric } 46306c3fb27SDimitry Andric *OS << "</code>\n"; 46406c3fb27SDimitry Andric *OS << "</template>"; 46506c3fb27SDimitry Andric } 46606c3fb27SDimitry Andric 46706c3fb27SDimitry Andric // Write the CFG diagram, a graph of basic blocks. 46806c3fb27SDimitry Andric // Laying out graphs is hard, so we construct a graphviz description and shell 46906c3fb27SDimitry Andric // out to `dot` to turn it into an SVG. 47006c3fb27SDimitry Andric void writeCFG() { 47106c3fb27SDimitry Andric *OS << "<template data-copy='cfg'>\n"; 47206c3fb27SDimitry Andric if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG()))) 47306c3fb27SDimitry Andric *OS << *SVG; 47406c3fb27SDimitry Andric else 47506c3fb27SDimitry Andric *OS << "Can't draw CFG: " << toString(SVG.takeError()); 47606c3fb27SDimitry Andric *OS << "</template>\n"; 47706c3fb27SDimitry Andric } 47806c3fb27SDimitry Andric 47906c3fb27SDimitry Andric // Produce a graphviz description of a CFG. 48006c3fb27SDimitry Andric static std::string buildCFGDot(const clang::CFG &CFG) { 48106c3fb27SDimitry Andric std::string Graph; 48206c3fb27SDimitry Andric llvm::raw_string_ostream GraphS(Graph); 48306c3fb27SDimitry Andric // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses. 48406c3fb27SDimitry Andric GraphS << R"(digraph { 48506c3fb27SDimitry Andric tooltip=" " 48606c3fb27SDimitry Andric node[class=bb, shape=square, fontname="sans-serif", tooltip=" "] 48706c3fb27SDimitry Andric edge[tooltip = " "] 48806c3fb27SDimitry Andric )"; 48906c3fb27SDimitry Andric for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) 49006c3fb27SDimitry Andric GraphS << " " << blockID(I) << " [id=" << blockID(I) << "]\n"; 49106c3fb27SDimitry Andric for (const auto *Block : CFG) { 49206c3fb27SDimitry Andric for (const auto &Succ : Block->succs()) { 493*5f757f3fSDimitry Andric if (Succ.getReachableBlock()) 49406c3fb27SDimitry Andric GraphS << " " << blockID(Block->getBlockID()) << " -> " 49506c3fb27SDimitry Andric << blockID(Succ.getReachableBlock()->getBlockID()) << "\n"; 49606c3fb27SDimitry Andric } 49706c3fb27SDimitry Andric } 49806c3fb27SDimitry Andric GraphS << "}\n"; 49906c3fb27SDimitry Andric return Graph; 50006c3fb27SDimitry Andric } 50106c3fb27SDimitry Andric }; 50206c3fb27SDimitry Andric 50306c3fb27SDimitry Andric // Nothing interesting here, just subprocess/temp-file plumbing. 50406c3fb27SDimitry Andric llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) { 50506c3fb27SDimitry Andric std::string DotPath; 50606c3fb27SDimitry Andric if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT")) 50706c3fb27SDimitry Andric DotPath = FromEnv; 50806c3fb27SDimitry Andric else { 50906c3fb27SDimitry Andric auto FromPath = llvm::sys::findProgramByName("dot"); 51006c3fb27SDimitry Andric if (!FromPath) 51106c3fb27SDimitry Andric return llvm::createStringError(FromPath.getError(), 51206c3fb27SDimitry Andric "'dot' not found on PATH"); 51306c3fb27SDimitry Andric DotPath = FromPath.get(); 51406c3fb27SDimitry Andric } 51506c3fb27SDimitry Andric 51606c3fb27SDimitry Andric // Create input and output files for `dot` subprocess. 51706c3fb27SDimitry Andric // (We create the output file as empty, to reserve the temp filename). 51806c3fb27SDimitry Andric llvm::SmallString<256> Input, Output; 51906c3fb27SDimitry Andric int InputFD; 52006c3fb27SDimitry Andric if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD, 52106c3fb27SDimitry Andric Input)) 52206c3fb27SDimitry Andric return llvm::createStringError(EC, "failed to create `dot` temp input"); 52306c3fb27SDimitry Andric llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph; 52406c3fb27SDimitry Andric auto DeleteInput = 52506c3fb27SDimitry Andric llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); }); 52606c3fb27SDimitry Andric if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output)) 52706c3fb27SDimitry Andric return llvm::createStringError(EC, "failed to create `dot` temp output"); 52806c3fb27SDimitry Andric auto DeleteOutput = 52906c3fb27SDimitry Andric llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); }); 53006c3fb27SDimitry Andric 53106c3fb27SDimitry Andric std::vector<std::optional<llvm::StringRef>> Redirects = { 53206c3fb27SDimitry Andric Input, Output, 53306c3fb27SDimitry Andric /*stderr=*/std::nullopt}; 53406c3fb27SDimitry Andric std::string ErrMsg; 53506c3fb27SDimitry Andric int Code = llvm::sys::ExecuteAndWait( 53606c3fb27SDimitry Andric DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects, 53706c3fb27SDimitry Andric /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); 53806c3fb27SDimitry Andric if (!ErrMsg.empty()) 53906c3fb27SDimitry Andric return llvm::createStringError(llvm::inconvertibleErrorCode(), 54006c3fb27SDimitry Andric "'dot' failed: " + ErrMsg); 54106c3fb27SDimitry Andric if (Code != 0) 54206c3fb27SDimitry Andric return llvm::createStringError(llvm::inconvertibleErrorCode(), 54306c3fb27SDimitry Andric "'dot' failed (" + llvm::Twine(Code) + ")"); 54406c3fb27SDimitry Andric 54506c3fb27SDimitry Andric auto Buf = llvm::MemoryBuffer::getFile(Output); 54606c3fb27SDimitry Andric if (!Buf) 54706c3fb27SDimitry Andric return llvm::createStringError(Buf.getError(), "Can't read `dot` output"); 54806c3fb27SDimitry Andric 54906c3fb27SDimitry Andric // Output has <?xml> prefix we don't want. Skip to <svg> tag. 55006c3fb27SDimitry Andric llvm::StringRef Result = Buf.get()->getBuffer(); 55106c3fb27SDimitry Andric auto Pos = Result.find("<svg"); 55206c3fb27SDimitry Andric if (Pos == llvm::StringRef::npos) 55306c3fb27SDimitry Andric return llvm::createStringError(llvm::inconvertibleErrorCode(), 55406c3fb27SDimitry Andric "Can't find <svg> tag in `dot` output"); 55506c3fb27SDimitry Andric return Result.substr(Pos).str(); 55606c3fb27SDimitry Andric } 55706c3fb27SDimitry Andric 55806c3fb27SDimitry Andric } // namespace 55906c3fb27SDimitry Andric 56006c3fb27SDimitry Andric std::unique_ptr<Logger> 56106c3fb27SDimitry Andric Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) { 56206c3fb27SDimitry Andric return std::make_unique<HTMLLogger>(std::move(Streams)); 56306c3fb27SDimitry Andric } 56406c3fb27SDimitry Andric 56506c3fb27SDimitry Andric } // namespace clang::dataflow 566