106c3fb27SDimitry Andric //===-- HTMLLogger.cpp ----------------------------------------------------===// 206c3fb27SDimitry Andric // 306c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 406c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 506c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 606c3fb27SDimitry Andric // 706c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 806c3fb27SDimitry Andric // 906c3fb27SDimitry Andric // This file implements the HTML logger. Given a directory dir/, we write 1006c3fb27SDimitry Andric // dir/0.html for the first analysis, etc. 1106c3fb27SDimitry Andric // These files contain a visualization that allows inspecting the CFG and the 1206c3fb27SDimitry Andric // state of the analysis at each point. 1306c3fb27SDimitry Andric // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded 1406c3fb27SDimitry Andric // so each output file is self-contained. 1506c3fb27SDimitry Andric // 1606c3fb27SDimitry Andric // VIEWS 1706c3fb27SDimitry Andric // 1806c3fb27SDimitry Andric // The timeline and function view are always shown. These allow selecting basic 1906c3fb27SDimitry Andric // blocks, statements within them, and processing iterations (BBs are visited 2006c3fb27SDimitry Andric // multiple times when e.g. loops are involved). 2106c3fb27SDimitry Andric // These are written directly into the HTML body. 2206c3fb27SDimitry Andric // 2306c3fb27SDimitry Andric // There are also listings of particular basic blocks, and dumps of the state 2406c3fb27SDimitry Andric // at particular analysis points (i.e. BB2 iteration 3 statement 2). 2506c3fb27SDimitry Andric // These are only shown when the relevant BB/analysis point is *selected*. 2606c3fb27SDimitry Andric // 2706c3fb27SDimitry Andric // DATA AND TEMPLATES 2806c3fb27SDimitry Andric // 2906c3fb27SDimitry Andric // The HTML proper is mostly static. 3006c3fb27SDimitry Andric // The analysis data is in a JSON object HTMLLoggerData which is embedded as 3106c3fb27SDimitry Andric // a <script> in the <head>. 3206c3fb27SDimitry Andric // This gets rendered into DOM by a simple template processor which substitutes 3306c3fb27SDimitry Andric // the data into <template> tags embedded in the HTML. (see inflate() in JS). 3406c3fb27SDimitry Andric // 3506c3fb27SDimitry Andric // SELECTION 3606c3fb27SDimitry Andric // 3706c3fb27SDimitry Andric // This is the only real interactive mechanism. 3806c3fb27SDimitry Andric // 3906c3fb27SDimitry Andric // At any given time, there are several named selections, e.g.: 4006c3fb27SDimitry Andric // bb: B2 (basic block 0 is selected) 4106c3fb27SDimitry Andric // elt: B2.4 (statement 4 is selected) 4206c3fb27SDimitry Andric // iter: B2:1 (iteration 1 of the basic block is selected) 4306c3fb27SDimitry Andric // hover: B3 (hovering over basic block 3) 4406c3fb27SDimitry Andric // 4506c3fb27SDimitry Andric // The selection is updated by mouse events: hover by moving the mouse and 4606c3fb27SDimitry Andric // others by clicking. Elements that are click targets generally have attributes 4706c3fb27SDimitry Andric // (id or data-foo) that define what they should select. 4806c3fb27SDimitry Andric // See watchSelection() in JS for the exact logic. 4906c3fb27SDimitry Andric // 5006c3fb27SDimitry Andric // When the "bb" selection is set to "B2": 5106c3fb27SDimitry Andric // - sections <section data-selection="bb"> get shown 5206c3fb27SDimitry Andric // - templates under such sections get re-rendered 5306c3fb27SDimitry Andric // - elements with class/id "B2" get class "bb-select" 5406c3fb27SDimitry Andric // 5506c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 5606c3fb27SDimitry Andric 57*0fca6ea1SDimitry Andric #include "clang/Analysis/FlowSensitive/AdornedCFG.h" 5806c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/DebugSupport.h" 5906c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/Logger.h" 6006c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" 6106c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/Value.h" 6206c3fb27SDimitry Andric #include "clang/Basic/SourceManager.h" 6306c3fb27SDimitry Andric #include "clang/Lex/Lexer.h" 6406c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 6506c3fb27SDimitry Andric #include "llvm/ADT/ScopeExit.h" 6606c3fb27SDimitry Andric #include "llvm/Support/Error.h" 6706c3fb27SDimitry Andric #include "llvm/Support/FormatVariadic.h" 6806c3fb27SDimitry Andric #include "llvm/Support/JSON.h" 6906c3fb27SDimitry Andric #include "llvm/Support/Program.h" 7006c3fb27SDimitry Andric #include "llvm/Support/ScopedPrinter.h" 7106c3fb27SDimitry Andric #include "llvm/Support/raw_ostream.h" 7206c3fb27SDimitry Andric // Defines assets: HTMLLogger_{html_js,css} 7306c3fb27SDimitry Andric #include "HTMLLogger.inc" 7406c3fb27SDimitry Andric 7506c3fb27SDimitry Andric namespace clang::dataflow { 7606c3fb27SDimitry Andric namespace { 7706c3fb27SDimitry Andric 7806c3fb27SDimitry Andric // Render a graphviz graph specification to SVG using the `dot` tool. 7906c3fb27SDimitry Andric llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph); 8006c3fb27SDimitry Andric 8106c3fb27SDimitry Andric using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>; 8206c3fb27SDimitry Andric 8306c3fb27SDimitry Andric // Recursively dumps Values/StorageLocations as JSON 8406c3fb27SDimitry Andric class ModelDumper { 8506c3fb27SDimitry Andric public: 8606c3fb27SDimitry Andric ModelDumper(llvm::json::OStream &JOS, const Environment &Env) 8706c3fb27SDimitry Andric : JOS(JOS), Env(Env) {} 8806c3fb27SDimitry Andric 8906c3fb27SDimitry Andric void dump(Value &V) { 9006c3fb27SDimitry Andric JOS.attribute("value_id", llvm::to_string(&V)); 9106c3fb27SDimitry Andric if (!Visited.insert(&V).second) 9206c3fb27SDimitry Andric return; 9306c3fb27SDimitry Andric 9406c3fb27SDimitry Andric JOS.attribute("kind", debugString(V.getKind())); 9506c3fb27SDimitry Andric 9606c3fb27SDimitry Andric switch (V.getKind()) { 9706c3fb27SDimitry Andric case Value::Kind::Integer: 9806c3fb27SDimitry Andric case Value::Kind::TopBool: 9906c3fb27SDimitry Andric case Value::Kind::AtomicBool: 10006c3fb27SDimitry Andric case Value::Kind::FormulaBool: 10106c3fb27SDimitry Andric break; 10206c3fb27SDimitry Andric case Value::Kind::Pointer: 10306c3fb27SDimitry Andric JOS.attributeObject( 10406c3fb27SDimitry Andric "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); }); 10506c3fb27SDimitry Andric break; 10606c3fb27SDimitry Andric } 10706c3fb27SDimitry Andric 10806c3fb27SDimitry Andric for (const auto& Prop : V.properties()) 10906c3fb27SDimitry Andric JOS.attributeObject(("p:" + Prop.first()).str(), 11006c3fb27SDimitry Andric [&] { dump(*Prop.second); }); 11106c3fb27SDimitry Andric 11206c3fb27SDimitry Andric // Running the SAT solver is expensive, but knowing which booleans are 11306c3fb27SDimitry Andric // guaranteed true/false here is valuable and hard to determine by hand. 11406c3fb27SDimitry Andric if (auto *B = llvm::dyn_cast<BoolValue>(&V)) { 11506c3fb27SDimitry Andric JOS.attribute("formula", llvm::to_string(B->formula())); 1165f757f3fSDimitry Andric JOS.attribute("truth", Env.proves(B->formula()) ? "true" 1175f757f3fSDimitry Andric : Env.proves(Env.arena().makeNot(B->formula())) 11806c3fb27SDimitry Andric ? "false" 11906c3fb27SDimitry Andric : "unknown"); 12006c3fb27SDimitry Andric } 12106c3fb27SDimitry Andric } 12206c3fb27SDimitry Andric void dump(const StorageLocation &L) { 12306c3fb27SDimitry Andric JOS.attribute("location", llvm::to_string(&L)); 12406c3fb27SDimitry Andric if (!Visited.insert(&L).second) 12506c3fb27SDimitry Andric return; 12606c3fb27SDimitry Andric 12706c3fb27SDimitry Andric JOS.attribute("type", L.getType().getAsString()); 128*0fca6ea1SDimitry Andric if (!L.getType()->isRecordType()) 12906c3fb27SDimitry Andric if (auto *V = Env.getValue(L)) 13006c3fb27SDimitry Andric dump(*V); 1315f757f3fSDimitry Andric 1325f757f3fSDimitry Andric if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) { 1335f757f3fSDimitry Andric for (const auto &Child : RLoc->children()) 1345f757f3fSDimitry Andric JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] { 1355f757f3fSDimitry Andric if (Child.second) 1365f757f3fSDimitry Andric if (Value *Val = Env.getValue(*Child.second)) 1375f757f3fSDimitry Andric dump(*Val); 1385f757f3fSDimitry Andric }); 1395f757f3fSDimitry Andric 1405f757f3fSDimitry Andric for (const auto &SyntheticField : RLoc->synthetic_fields()) 1415f757f3fSDimitry Andric JOS.attributeObject(("sf:" + SyntheticField.first()).str(), 1425f757f3fSDimitry Andric [&] { dump(*SyntheticField.second); }); 1435f757f3fSDimitry Andric } 14406c3fb27SDimitry Andric } 14506c3fb27SDimitry Andric 14606c3fb27SDimitry Andric llvm::DenseSet<const void*> Visited; 14706c3fb27SDimitry Andric llvm::json::OStream &JOS; 14806c3fb27SDimitry Andric const Environment &Env; 14906c3fb27SDimitry Andric }; 15006c3fb27SDimitry Andric 15106c3fb27SDimitry Andric class HTMLLogger : public Logger { 1525f757f3fSDimitry Andric struct Iteration { 1535f757f3fSDimitry Andric const CFGBlock *Block; 1545f757f3fSDimitry Andric unsigned Iter; 1555f757f3fSDimitry Andric bool PostVisit; 1565f757f3fSDimitry Andric bool Converged; 1575f757f3fSDimitry Andric }; 1585f757f3fSDimitry Andric 15906c3fb27SDimitry Andric StreamFactory Streams; 16006c3fb27SDimitry Andric std::unique_ptr<llvm::raw_ostream> OS; 161*0fca6ea1SDimitry Andric std::string JSON; 162*0fca6ea1SDimitry Andric llvm::raw_string_ostream JStringStream{JSON}; 163*0fca6ea1SDimitry Andric llvm::json::OStream JOS{JStringStream, /*Indent=*/2}; 16406c3fb27SDimitry Andric 165*0fca6ea1SDimitry Andric const AdornedCFG *ACFG; 16606c3fb27SDimitry Andric // Timeline of iterations of CFG block visitation. 1675f757f3fSDimitry Andric std::vector<Iteration> Iters; 1685f757f3fSDimitry Andric // Indexes in `Iters` of the iterations for each block. 1695f757f3fSDimitry Andric llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters; 170*0fca6ea1SDimitry Andric // For a given block ID, did the block converge (on the last iteration)? 171*0fca6ea1SDimitry Andric llvm::BitVector BlockConverged; 17206c3fb27SDimitry Andric // The messages logged in the current context but not yet written. 17306c3fb27SDimitry Andric std::string ContextLogs; 17406c3fb27SDimitry Andric // The number of elements we have visited within the current CFG block. 17506c3fb27SDimitry Andric unsigned ElementIndex; 17606c3fb27SDimitry Andric 17706c3fb27SDimitry Andric public: 17806c3fb27SDimitry Andric explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {} 179*0fca6ea1SDimitry Andric void beginAnalysis(const AdornedCFG &ACFG, 18006c3fb27SDimitry Andric TypeErasedDataflowAnalysis &A) override { 18106c3fb27SDimitry Andric OS = Streams(); 182*0fca6ea1SDimitry Andric this->ACFG = &ACFG; 18306c3fb27SDimitry Andric *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first; 18406c3fb27SDimitry Andric 185*0fca6ea1SDimitry Andric BlockConverged.resize(ACFG.getCFG().getNumBlockIDs()); 186*0fca6ea1SDimitry Andric 187*0fca6ea1SDimitry Andric const auto &D = ACFG.getDecl(); 18806c3fb27SDimitry Andric const auto &SM = A.getASTContext().getSourceManager(); 18906c3fb27SDimitry Andric *OS << "<title>"; 1905f757f3fSDimitry Andric if (const auto *ND = dyn_cast<NamedDecl>(&D)) 19106c3fb27SDimitry Andric *OS << ND->getNameAsString() << " at "; 1925f757f3fSDimitry Andric *OS << SM.getFilename(D.getLocation()) << ":" 1935f757f3fSDimitry Andric << SM.getSpellingLineNumber(D.getLocation()); 19406c3fb27SDimitry Andric *OS << "</title>\n"; 19506c3fb27SDimitry Andric 19606c3fb27SDimitry Andric *OS << "<style>" << HTMLLogger_css << "</style>\n"; 19706c3fb27SDimitry Andric *OS << "<script>" << HTMLLogger_js << "</script>\n"; 19806c3fb27SDimitry Andric 19906c3fb27SDimitry Andric writeCode(); 200*0fca6ea1SDimitry Andric JOS.objectBegin(); 201*0fca6ea1SDimitry Andric JOS.attributeBegin("states"); 202*0fca6ea1SDimitry Andric JOS.objectBegin(); 20306c3fb27SDimitry Andric } 20406c3fb27SDimitry Andric // Between beginAnalysis() and endAnalysis() we write all the states for 20506c3fb27SDimitry Andric // particular analysis points into the `timeline` array. 20606c3fb27SDimitry Andric void endAnalysis() override { 207*0fca6ea1SDimitry Andric JOS.objectEnd(); 208*0fca6ea1SDimitry Andric JOS.attributeEnd(); 20906c3fb27SDimitry Andric 210*0fca6ea1SDimitry Andric JOS.attributeArray("timeline", [&] { 21106c3fb27SDimitry Andric for (const auto &E : Iters) { 212*0fca6ea1SDimitry Andric JOS.object([&] { 213*0fca6ea1SDimitry Andric JOS.attribute("block", blockID(E.Block->getBlockID())); 214*0fca6ea1SDimitry Andric JOS.attribute("iter", E.Iter); 215*0fca6ea1SDimitry Andric JOS.attribute("post_visit", E.PostVisit); 216*0fca6ea1SDimitry Andric JOS.attribute("converged", E.Converged); 21706c3fb27SDimitry Andric }); 21806c3fb27SDimitry Andric } 21906c3fb27SDimitry Andric }); 220*0fca6ea1SDimitry Andric JOS.attributeObject("cfg", [&] { 22106c3fb27SDimitry Andric for (const auto &E : BlockIters) 22206c3fb27SDimitry Andric writeBlock(*E.first, E.second); 22306c3fb27SDimitry Andric }); 22406c3fb27SDimitry Andric 225*0fca6ea1SDimitry Andric JOS.objectEnd(); 226*0fca6ea1SDimitry Andric 227*0fca6ea1SDimitry Andric writeCFG(); 228*0fca6ea1SDimitry Andric 229*0fca6ea1SDimitry Andric *OS << "<script>var HTMLLoggerData = \n"; 230*0fca6ea1SDimitry Andric *OS << JSON; 23106c3fb27SDimitry Andric *OS << ";\n</script>\n"; 23206c3fb27SDimitry Andric *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second; 23306c3fb27SDimitry Andric } 23406c3fb27SDimitry Andric 2355f757f3fSDimitry Andric void enterBlock(const CFGBlock &B, bool PostVisit) override { 2365f757f3fSDimitry Andric llvm::SmallVector<size_t> &BIter = BlockIters[&B]; 2375f757f3fSDimitry Andric unsigned IterNum = BIter.size() + 1; 2385f757f3fSDimitry Andric BIter.push_back(Iters.size()); 2395f757f3fSDimitry Andric Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false}); 240*0fca6ea1SDimitry Andric if (!PostVisit) 241*0fca6ea1SDimitry Andric BlockConverged[B.getBlockID()] = false; 24206c3fb27SDimitry Andric ElementIndex = 0; 24306c3fb27SDimitry Andric } 24406c3fb27SDimitry Andric void enterElement(const CFGElement &E) override { 24506c3fb27SDimitry Andric ++ElementIndex; 24606c3fb27SDimitry Andric } 24706c3fb27SDimitry Andric 24806c3fb27SDimitry Andric static std::string blockID(unsigned Block) { 24906c3fb27SDimitry Andric return llvm::formatv("B{0}", Block); 25006c3fb27SDimitry Andric } 25106c3fb27SDimitry Andric static std::string eltID(unsigned Block, unsigned Element) { 25206c3fb27SDimitry Andric return llvm::formatv("B{0}.{1}", Block, Element); 25306c3fb27SDimitry Andric } 25406c3fb27SDimitry Andric static std::string iterID(unsigned Block, unsigned Iter) { 25506c3fb27SDimitry Andric return llvm::formatv("B{0}:{1}", Block, Iter); 25606c3fb27SDimitry Andric } 25706c3fb27SDimitry Andric static std::string elementIterID(unsigned Block, unsigned Iter, 25806c3fb27SDimitry Andric unsigned Element) { 25906c3fb27SDimitry Andric return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element); 26006c3fb27SDimitry Andric } 26106c3fb27SDimitry Andric 26206c3fb27SDimitry Andric // Write the analysis state associated with a particular analysis point. 26306c3fb27SDimitry Andric // FIXME: this dump is fairly opaque. We should show: 26406c3fb27SDimitry Andric // - values associated with the current Stmt 26506c3fb27SDimitry Andric // - values associated with its children 26606c3fb27SDimitry Andric // - meaningful names for values 26706c3fb27SDimitry Andric // - which boolean values are implied true/false by the flow condition 26806c3fb27SDimitry Andric void recordState(TypeErasedDataflowAnalysisState &State) override { 2695f757f3fSDimitry Andric unsigned Block = Iters.back().Block->getBlockID(); 2705f757f3fSDimitry Andric unsigned Iter = Iters.back().Iter; 2715f757f3fSDimitry Andric bool PostVisit = Iters.back().PostVisit; 272*0fca6ea1SDimitry Andric JOS.attributeObject(elementIterID(Block, Iter, ElementIndex), [&] { 273*0fca6ea1SDimitry Andric JOS.attribute("block", blockID(Block)); 274*0fca6ea1SDimitry Andric JOS.attribute("iter", Iter); 275*0fca6ea1SDimitry Andric JOS.attribute("post_visit", PostVisit); 276*0fca6ea1SDimitry Andric JOS.attribute("element", ElementIndex); 27706c3fb27SDimitry Andric 27806c3fb27SDimitry Andric // If this state immediately follows an Expr, show its built-in model. 27906c3fb27SDimitry Andric if (ElementIndex > 0) { 28006c3fb27SDimitry Andric auto S = 2815f757f3fSDimitry Andric Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>(); 2825f757f3fSDimitry Andric if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) { 2835f757f3fSDimitry Andric if (E->isPRValue()) { 284*0fca6ea1SDimitry Andric if (!E->getType()->isRecordType()) 2855f757f3fSDimitry Andric if (auto *V = State.Env.getValue(*E)) 286*0fca6ea1SDimitry Andric JOS.attributeObject( 287*0fca6ea1SDimitry Andric "value", [&] { ModelDumper(JOS, State.Env).dump(*V); }); 2885f757f3fSDimitry Andric } else { 2895f757f3fSDimitry Andric if (auto *Loc = State.Env.getStorageLocation(*E)) 290*0fca6ea1SDimitry Andric JOS.attributeObject( 291*0fca6ea1SDimitry Andric "value", [&] { ModelDumper(JOS, State.Env).dump(*Loc); }); 29206c3fb27SDimitry Andric } 2935f757f3fSDimitry Andric } 2945f757f3fSDimitry Andric } 29506c3fb27SDimitry Andric if (!ContextLogs.empty()) { 296*0fca6ea1SDimitry Andric JOS.attribute("logs", ContextLogs); 29706c3fb27SDimitry Andric ContextLogs.clear(); 29806c3fb27SDimitry Andric } 29906c3fb27SDimitry Andric { 30006c3fb27SDimitry Andric std::string BuiltinLattice; 30106c3fb27SDimitry Andric llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice); 30206c3fb27SDimitry Andric State.Env.dump(BuiltinLatticeS); 303*0fca6ea1SDimitry Andric JOS.attribute("builtinLattice", BuiltinLattice); 30406c3fb27SDimitry Andric } 30506c3fb27SDimitry Andric }); 30606c3fb27SDimitry Andric } 307*0fca6ea1SDimitry Andric void blockConverged() override { 308*0fca6ea1SDimitry Andric Iters.back().Converged = true; 309*0fca6ea1SDimitry Andric BlockConverged[Iters.back().Block->getBlockID()] = true; 310*0fca6ea1SDimitry Andric } 31106c3fb27SDimitry Andric 31206c3fb27SDimitry Andric void logText(llvm::StringRef S) override { 31306c3fb27SDimitry Andric ContextLogs.append(S.begin(), S.end()); 31406c3fb27SDimitry Andric ContextLogs.push_back('\n'); 31506c3fb27SDimitry Andric } 31606c3fb27SDimitry Andric 31706c3fb27SDimitry Andric private: 31806c3fb27SDimitry Andric // Write the CFG block details. 31906c3fb27SDimitry Andric // Currently this is just the list of elements in execution order. 32006c3fb27SDimitry Andric // FIXME: an AST dump would be a useful view, too. 3215f757f3fSDimitry Andric void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) { 322*0fca6ea1SDimitry Andric JOS.attributeObject(blockID(B.getBlockID()), [&] { 323*0fca6ea1SDimitry Andric JOS.attributeArray("iters", [&] { 3245f757f3fSDimitry Andric for (size_t IterIdx : ItersForB) { 3255f757f3fSDimitry Andric const Iteration &Iter = Iters[IterIdx]; 326*0fca6ea1SDimitry Andric JOS.object([&] { 327*0fca6ea1SDimitry Andric JOS.attribute("iter", Iter.Iter); 328*0fca6ea1SDimitry Andric JOS.attribute("post_visit", Iter.PostVisit); 329*0fca6ea1SDimitry Andric JOS.attribute("converged", Iter.Converged); 3305f757f3fSDimitry Andric }); 3315f757f3fSDimitry Andric } 3325f757f3fSDimitry Andric }); 333*0fca6ea1SDimitry Andric JOS.attributeArray("elements", [&] { 33406c3fb27SDimitry Andric for (const auto &Elt : B.Elements) { 33506c3fb27SDimitry Andric std::string Dump; 33606c3fb27SDimitry Andric llvm::raw_string_ostream DumpS(Dump); 33706c3fb27SDimitry Andric Elt.dumpToStream(DumpS); 338*0fca6ea1SDimitry Andric JOS.value(Dump); 33906c3fb27SDimitry Andric } 34006c3fb27SDimitry Andric }); 34106c3fb27SDimitry Andric }); 34206c3fb27SDimitry Andric } 34306c3fb27SDimitry Andric 34406c3fb27SDimitry Andric // Write the code of function being examined. 34506c3fb27SDimitry Andric // We want to overlay the code with <span>s that mark which BB particular 34606c3fb27SDimitry Andric // tokens are associated with, and even which BB element (so that clicking 34706c3fb27SDimitry Andric // can select the right element). 34806c3fb27SDimitry Andric void writeCode() { 349*0fca6ea1SDimitry Andric const auto &AST = ACFG->getDecl().getASTContext(); 35006c3fb27SDimitry Andric bool Invalid = false; 35106c3fb27SDimitry Andric 35206c3fb27SDimitry Andric // Extract the source code from the original file. 35306c3fb27SDimitry Andric // Pretty-printing from the AST would probably be nicer (no macros or 35406c3fb27SDimitry Andric // indentation to worry about), but we need the boundaries of particular 35506c3fb27SDimitry Andric // AST nodes and the printer doesn't provide this. 35606c3fb27SDimitry Andric auto Range = clang::Lexer::makeFileCharRange( 357*0fca6ea1SDimitry Andric CharSourceRange::getTokenRange(ACFG->getDecl().getSourceRange()), 35806c3fb27SDimitry Andric AST.getSourceManager(), AST.getLangOpts()); 35906c3fb27SDimitry Andric if (Range.isInvalid()) 36006c3fb27SDimitry Andric return; 36106c3fb27SDimitry Andric llvm::StringRef Code = clang::Lexer::getSourceText( 36206c3fb27SDimitry Andric Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid); 36306c3fb27SDimitry Andric if (Invalid) 36406c3fb27SDimitry Andric return; 36506c3fb27SDimitry Andric 36606c3fb27SDimitry Andric // TokenInfo stores the BB and set of elements that a token is part of. 36706c3fb27SDimitry Andric struct TokenInfo { 3685f757f3fSDimitry Andric enum : unsigned { Missing = static_cast<unsigned>(-1) }; 3695f757f3fSDimitry Andric 37006c3fb27SDimitry Andric // The basic block this is part of. 37106c3fb27SDimitry Andric // This is the BB of the stmt with the smallest containing range. 37206c3fb27SDimitry Andric unsigned BB = Missing; 37306c3fb27SDimitry Andric unsigned BBPriority = 0; 37406c3fb27SDimitry Andric // The most specific stmt this is part of (smallest range). 37506c3fb27SDimitry Andric unsigned Elt = Missing; 37606c3fb27SDimitry Andric unsigned EltPriority = 0; 37706c3fb27SDimitry Andric // All stmts this is part of. 37806c3fb27SDimitry Andric SmallVector<unsigned> Elts; 37906c3fb27SDimitry Andric 38006c3fb27SDimitry Andric // Mark this token as being part of BB.Elt. 38106c3fb27SDimitry Andric // RangeLen is the character length of the element's range, used to 38206c3fb27SDimitry Andric // distinguish inner vs outer statements. 38306c3fb27SDimitry Andric // For example in `a==0`, token "a" is part of the stmts "a" and "a==0". 38406c3fb27SDimitry Andric // However "a" has a smaller range, so is more specific. Clicking on the 38506c3fb27SDimitry Andric // token "a" should select the stmt "a". 38606c3fb27SDimitry Andric void assign(unsigned BB, unsigned Elt, unsigned RangeLen) { 38706c3fb27SDimitry Andric // A worse BB (larger range) => ignore. 38806c3fb27SDimitry Andric if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen) 38906c3fb27SDimitry Andric return; 39006c3fb27SDimitry Andric if (BB != this->BB) { 39106c3fb27SDimitry Andric this->BB = BB; 39206c3fb27SDimitry Andric Elts.clear(); 39306c3fb27SDimitry Andric BBPriority = RangeLen; 39406c3fb27SDimitry Andric } 39506c3fb27SDimitry Andric BBPriority = std::min(BBPriority, RangeLen); 39606c3fb27SDimitry Andric Elts.push_back(Elt); 39706c3fb27SDimitry Andric if (this->Elt == Missing || EltPriority > RangeLen) 39806c3fb27SDimitry Andric this->Elt = Elt; 39906c3fb27SDimitry Andric } 40006c3fb27SDimitry Andric bool operator==(const TokenInfo &Other) const { 40106c3fb27SDimitry Andric return std::tie(BB, Elt, Elts) == 40206c3fb27SDimitry Andric std::tie(Other.BB, Other.Elt, Other.Elts); 40306c3fb27SDimitry Andric } 40406c3fb27SDimitry Andric // Write the attributes for the <span> on this token. 40506c3fb27SDimitry Andric void write(llvm::raw_ostream &OS) const { 40606c3fb27SDimitry Andric OS << "class='c"; 40706c3fb27SDimitry Andric if (BB != Missing) 40806c3fb27SDimitry Andric OS << " " << blockID(BB); 40906c3fb27SDimitry Andric for (unsigned Elt : Elts) 41006c3fb27SDimitry Andric OS << " " << eltID(BB, Elt); 41106c3fb27SDimitry Andric OS << "'"; 41206c3fb27SDimitry Andric 41306c3fb27SDimitry Andric if (Elt != Missing) 41406c3fb27SDimitry Andric OS << " data-elt='" << eltID(BB, Elt) << "'"; 41506c3fb27SDimitry Andric if (BB != Missing) 41606c3fb27SDimitry Andric OS << " data-bb='" << blockID(BB) << "'"; 41706c3fb27SDimitry Andric } 41806c3fb27SDimitry Andric }; 41906c3fb27SDimitry Andric 42006c3fb27SDimitry Andric // Construct one TokenInfo per character in a flat array. 42106c3fb27SDimitry Andric // This is inefficient (chars in a token all have the same info) but simple. 42206c3fb27SDimitry Andric std::vector<TokenInfo> State(Code.size()); 423*0fca6ea1SDimitry Andric for (const auto *Block : ACFG->getCFG()) { 42406c3fb27SDimitry Andric unsigned EltIndex = 0; 42506c3fb27SDimitry Andric for (const auto& Elt : *Block) { 42606c3fb27SDimitry Andric ++EltIndex; 42706c3fb27SDimitry Andric if (const auto S = Elt.getAs<CFGStmt>()) { 42806c3fb27SDimitry Andric auto EltRange = clang::Lexer::makeFileCharRange( 42906c3fb27SDimitry Andric CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()), 43006c3fb27SDimitry Andric AST.getSourceManager(), AST.getLangOpts()); 43106c3fb27SDimitry Andric if (EltRange.isInvalid()) 43206c3fb27SDimitry Andric continue; 43306c3fb27SDimitry Andric if (EltRange.getBegin() < Range.getBegin() || 43406c3fb27SDimitry Andric EltRange.getEnd() >= Range.getEnd() || 43506c3fb27SDimitry Andric EltRange.getEnd() < Range.getBegin() || 43606c3fb27SDimitry Andric EltRange.getEnd() >= Range.getEnd()) 43706c3fb27SDimitry Andric continue; 43806c3fb27SDimitry Andric unsigned Off = EltRange.getBegin().getRawEncoding() - 43906c3fb27SDimitry Andric Range.getBegin().getRawEncoding(); 44006c3fb27SDimitry Andric unsigned Len = EltRange.getEnd().getRawEncoding() - 44106c3fb27SDimitry Andric EltRange.getBegin().getRawEncoding(); 44206c3fb27SDimitry Andric for (unsigned I = 0; I < Len; ++I) 44306c3fb27SDimitry Andric State[Off + I].assign(Block->getBlockID(), EltIndex, Len); 44406c3fb27SDimitry Andric } 44506c3fb27SDimitry Andric } 44606c3fb27SDimitry Andric } 44706c3fb27SDimitry Andric 44806c3fb27SDimitry Andric // Finally, write the code with the correct <span>s. 44906c3fb27SDimitry Andric unsigned Line = 45006c3fb27SDimitry Andric AST.getSourceManager().getSpellingLineNumber(Range.getBegin()); 45106c3fb27SDimitry Andric *OS << "<template data-copy='code'>\n"; 45206c3fb27SDimitry Andric *OS << "<code class='filename'>"; 45306c3fb27SDimitry Andric llvm::printHTMLEscaped( 45406c3fb27SDimitry Andric llvm::sys::path::filename( 45506c3fb27SDimitry Andric AST.getSourceManager().getFilename(Range.getBegin())), 45606c3fb27SDimitry Andric *OS); 45706c3fb27SDimitry Andric *OS << "</code>"; 45806c3fb27SDimitry Andric *OS << "<code class='line' data-line='" << Line++ << "'>"; 45906c3fb27SDimitry Andric for (unsigned I = 0; I < Code.size(); ++I) { 46006c3fb27SDimitry Andric // Don't actually write a <span> around each character, only break spans 46106c3fb27SDimitry Andric // when the TokenInfo changes. 46206c3fb27SDimitry Andric bool NeedOpen = I == 0 || !(State[I] == State[I-1]); 46306c3fb27SDimitry Andric bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]); 46406c3fb27SDimitry Andric if (NeedOpen) { 46506c3fb27SDimitry Andric *OS << "<span "; 46606c3fb27SDimitry Andric State[I].write(*OS); 46706c3fb27SDimitry Andric *OS << ">"; 46806c3fb27SDimitry Andric } 46906c3fb27SDimitry Andric if (Code[I] == '\n') 47006c3fb27SDimitry Andric *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>"; 47106c3fb27SDimitry Andric else 47206c3fb27SDimitry Andric llvm::printHTMLEscaped(Code.substr(I, 1), *OS); 47306c3fb27SDimitry Andric if (NeedClose) *OS << "</span>"; 47406c3fb27SDimitry Andric } 47506c3fb27SDimitry Andric *OS << "</code>\n"; 47606c3fb27SDimitry Andric *OS << "</template>"; 47706c3fb27SDimitry Andric } 47806c3fb27SDimitry Andric 47906c3fb27SDimitry Andric // Write the CFG diagram, a graph of basic blocks. 48006c3fb27SDimitry Andric // Laying out graphs is hard, so we construct a graphviz description and shell 48106c3fb27SDimitry Andric // out to `dot` to turn it into an SVG. 48206c3fb27SDimitry Andric void writeCFG() { 48306c3fb27SDimitry Andric *OS << "<template data-copy='cfg'>\n"; 484*0fca6ea1SDimitry Andric if (auto SVG = renderSVG(buildCFGDot(ACFG->getCFG()))) 48506c3fb27SDimitry Andric *OS << *SVG; 48606c3fb27SDimitry Andric else 48706c3fb27SDimitry Andric *OS << "Can't draw CFG: " << toString(SVG.takeError()); 48806c3fb27SDimitry Andric *OS << "</template>\n"; 48906c3fb27SDimitry Andric } 49006c3fb27SDimitry Andric 49106c3fb27SDimitry Andric // Produce a graphviz description of a CFG. 492*0fca6ea1SDimitry Andric std::string buildCFGDot(const clang::CFG &CFG) { 49306c3fb27SDimitry Andric std::string Graph; 49406c3fb27SDimitry Andric llvm::raw_string_ostream GraphS(Graph); 49506c3fb27SDimitry Andric // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses. 49606c3fb27SDimitry Andric GraphS << R"(digraph { 49706c3fb27SDimitry Andric tooltip=" " 49806c3fb27SDimitry Andric node[class=bb, shape=square, fontname="sans-serif", tooltip=" "] 49906c3fb27SDimitry Andric edge[tooltip = " "] 50006c3fb27SDimitry Andric )"; 501*0fca6ea1SDimitry Andric for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) { 502*0fca6ea1SDimitry Andric std::string Name = blockID(I); 503*0fca6ea1SDimitry Andric // Rightwards arrow, vertical line 504*0fca6ea1SDimitry Andric const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c"; 505*0fca6ea1SDimitry Andric if (BlockConverged[I]) 506*0fca6ea1SDimitry Andric Name += ConvergenceMarker; 507*0fca6ea1SDimitry Andric GraphS << " " << blockID(I) << " [id=" << blockID(I) << " label=\"" 508*0fca6ea1SDimitry Andric << Name << "\"]\n"; 509*0fca6ea1SDimitry Andric } 51006c3fb27SDimitry Andric for (const auto *Block : CFG) { 51106c3fb27SDimitry Andric for (const auto &Succ : Block->succs()) { 5125f757f3fSDimitry Andric if (Succ.getReachableBlock()) 51306c3fb27SDimitry Andric GraphS << " " << blockID(Block->getBlockID()) << " -> " 51406c3fb27SDimitry Andric << blockID(Succ.getReachableBlock()->getBlockID()) << "\n"; 51506c3fb27SDimitry Andric } 51606c3fb27SDimitry Andric } 51706c3fb27SDimitry Andric GraphS << "}\n"; 51806c3fb27SDimitry Andric return Graph; 51906c3fb27SDimitry Andric } 52006c3fb27SDimitry Andric }; 52106c3fb27SDimitry Andric 52206c3fb27SDimitry Andric // Nothing interesting here, just subprocess/temp-file plumbing. 52306c3fb27SDimitry Andric llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) { 52406c3fb27SDimitry Andric std::string DotPath; 52506c3fb27SDimitry Andric if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT")) 52606c3fb27SDimitry Andric DotPath = FromEnv; 52706c3fb27SDimitry Andric else { 52806c3fb27SDimitry Andric auto FromPath = llvm::sys::findProgramByName("dot"); 52906c3fb27SDimitry Andric if (!FromPath) 53006c3fb27SDimitry Andric return llvm::createStringError(FromPath.getError(), 53106c3fb27SDimitry Andric "'dot' not found on PATH"); 53206c3fb27SDimitry Andric DotPath = FromPath.get(); 53306c3fb27SDimitry Andric } 53406c3fb27SDimitry Andric 53506c3fb27SDimitry Andric // Create input and output files for `dot` subprocess. 53606c3fb27SDimitry Andric // (We create the output file as empty, to reserve the temp filename). 53706c3fb27SDimitry Andric llvm::SmallString<256> Input, Output; 53806c3fb27SDimitry Andric int InputFD; 53906c3fb27SDimitry Andric if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD, 54006c3fb27SDimitry Andric Input)) 54106c3fb27SDimitry Andric return llvm::createStringError(EC, "failed to create `dot` temp input"); 54206c3fb27SDimitry Andric llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph; 54306c3fb27SDimitry Andric auto DeleteInput = 54406c3fb27SDimitry Andric llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); }); 54506c3fb27SDimitry Andric if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output)) 54606c3fb27SDimitry Andric return llvm::createStringError(EC, "failed to create `dot` temp output"); 54706c3fb27SDimitry Andric auto DeleteOutput = 54806c3fb27SDimitry Andric llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); }); 54906c3fb27SDimitry Andric 55006c3fb27SDimitry Andric std::vector<std::optional<llvm::StringRef>> Redirects = { 55106c3fb27SDimitry Andric Input, Output, 55206c3fb27SDimitry Andric /*stderr=*/std::nullopt}; 55306c3fb27SDimitry Andric std::string ErrMsg; 55406c3fb27SDimitry Andric int Code = llvm::sys::ExecuteAndWait( 55506c3fb27SDimitry Andric DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects, 55606c3fb27SDimitry Andric /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); 55706c3fb27SDimitry Andric if (!ErrMsg.empty()) 55806c3fb27SDimitry Andric return llvm::createStringError(llvm::inconvertibleErrorCode(), 55906c3fb27SDimitry Andric "'dot' failed: " + ErrMsg); 56006c3fb27SDimitry Andric if (Code != 0) 56106c3fb27SDimitry Andric return llvm::createStringError(llvm::inconvertibleErrorCode(), 56206c3fb27SDimitry Andric "'dot' failed (" + llvm::Twine(Code) + ")"); 56306c3fb27SDimitry Andric 56406c3fb27SDimitry Andric auto Buf = llvm::MemoryBuffer::getFile(Output); 56506c3fb27SDimitry Andric if (!Buf) 56606c3fb27SDimitry Andric return llvm::createStringError(Buf.getError(), "Can't read `dot` output"); 56706c3fb27SDimitry Andric 56806c3fb27SDimitry Andric // Output has <?xml> prefix we don't want. Skip to <svg> tag. 56906c3fb27SDimitry Andric llvm::StringRef Result = Buf.get()->getBuffer(); 57006c3fb27SDimitry Andric auto Pos = Result.find("<svg"); 57106c3fb27SDimitry Andric if (Pos == llvm::StringRef::npos) 57206c3fb27SDimitry Andric return llvm::createStringError(llvm::inconvertibleErrorCode(), 57306c3fb27SDimitry Andric "Can't find <svg> tag in `dot` output"); 57406c3fb27SDimitry Andric return Result.substr(Pos).str(); 57506c3fb27SDimitry Andric } 57606c3fb27SDimitry Andric 57706c3fb27SDimitry Andric } // namespace 57806c3fb27SDimitry Andric 57906c3fb27SDimitry Andric std::unique_ptr<Logger> 58006c3fb27SDimitry Andric Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) { 58106c3fb27SDimitry Andric return std::make_unique<HTMLLogger>(std::move(Streams)); 58206c3fb27SDimitry Andric } 58306c3fb27SDimitry Andric 58406c3fb27SDimitry Andric } // namespace clang::dataflow 585