1a443b3d1SSam McCall //===-- HTMLLogger.cpp ----------------------------------------------------===// 2a443b3d1SSam McCall // 3a443b3d1SSam McCall // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4a443b3d1SSam McCall // See https://llvm.org/LICENSE.txt for license information. 5a443b3d1SSam McCall // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6a443b3d1SSam McCall // 7a443b3d1SSam McCall //===----------------------------------------------------------------------===// 8a443b3d1SSam McCall // 9a443b3d1SSam McCall // This file implements the HTML logger. Given a directory dir/, we write 10a443b3d1SSam McCall // dir/0.html for the first analysis, etc. 11a443b3d1SSam McCall // These files contain a visualization that allows inspecting the CFG and the 12a443b3d1SSam McCall // state of the analysis at each point. 13a443b3d1SSam McCall // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded 14a443b3d1SSam McCall // so each output file is self-contained. 15a443b3d1SSam McCall // 16a443b3d1SSam McCall // VIEWS 17a443b3d1SSam McCall // 18a443b3d1SSam McCall // The timeline and function view are always shown. These allow selecting basic 19a443b3d1SSam McCall // blocks, statements within them, and processing iterations (BBs are visited 20a443b3d1SSam McCall // multiple times when e.g. loops are involved). 21a443b3d1SSam McCall // These are written directly into the HTML body. 22a443b3d1SSam McCall // 23a443b3d1SSam McCall // There are also listings of particular basic blocks, and dumps of the state 24a443b3d1SSam McCall // at particular analysis points (i.e. BB2 iteration 3 statement 2). 25a443b3d1SSam McCall // These are only shown when the relevant BB/analysis point is *selected*. 26a443b3d1SSam McCall // 27a443b3d1SSam McCall // DATA AND TEMPLATES 28a443b3d1SSam McCall // 29a443b3d1SSam McCall // The HTML proper is mostly static. 30a443b3d1SSam McCall // The analysis data is in a JSON object HTMLLoggerData which is embedded as 31a443b3d1SSam McCall // a <script> in the <head>. 32a443b3d1SSam McCall // This gets rendered into DOM by a simple template processor which substitutes 33a443b3d1SSam McCall // the data into <template> tags embedded in the HTML. (see inflate() in JS). 34a443b3d1SSam McCall // 35a443b3d1SSam McCall // SELECTION 36a443b3d1SSam McCall // 37a443b3d1SSam McCall // This is the only real interactive mechanism. 38a443b3d1SSam McCall // 39a443b3d1SSam McCall // At any given time, there are several named selections, e.g.: 40a443b3d1SSam McCall // bb: B2 (basic block 0 is selected) 41a443b3d1SSam McCall // elt: B2.4 (statement 4 is selected) 42a443b3d1SSam McCall // iter: B2:1 (iteration 1 of the basic block is selected) 43a443b3d1SSam McCall // hover: B3 (hovering over basic block 3) 44a443b3d1SSam McCall // 45a443b3d1SSam McCall // The selection is updated by mouse events: hover by moving the mouse and 46a443b3d1SSam McCall // others by clicking. Elements that are click targets generally have attributes 47a443b3d1SSam McCall // (id or data-foo) that define what they should select. 48a443b3d1SSam McCall // See watchSelection() in JS for the exact logic. 49a443b3d1SSam McCall // 50a443b3d1SSam McCall // When the "bb" selection is set to "B2": 51a443b3d1SSam McCall // - sections <section data-selection="bb"> get shown 52a443b3d1SSam McCall // - templates under such sections get re-rendered 53a443b3d1SSam McCall // - elements with class/id "B2" get class "bb-select" 54a443b3d1SSam McCall // 55a443b3d1SSam McCall //===----------------------------------------------------------------------===// 56a443b3d1SSam McCall 5759ff3adcSmartinboehme #include "clang/Analysis/FlowSensitive/AdornedCFG.h" 58a443b3d1SSam McCall #include "clang/Analysis/FlowSensitive/DebugSupport.h" 59a443b3d1SSam McCall #include "clang/Analysis/FlowSensitive/Logger.h" 60a443b3d1SSam McCall #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" 61a443b3d1SSam McCall #include "clang/Analysis/FlowSensitive/Value.h" 62a443b3d1SSam McCall #include "clang/Basic/SourceManager.h" 63a443b3d1SSam McCall #include "clang/Lex/Lexer.h" 64a443b3d1SSam McCall #include "llvm/ADT/DenseMap.h" 65a443b3d1SSam McCall #include "llvm/ADT/ScopeExit.h" 66a443b3d1SSam McCall #include "llvm/Support/Error.h" 67a443b3d1SSam McCall #include "llvm/Support/FormatVariadic.h" 68a443b3d1SSam McCall #include "llvm/Support/JSON.h" 69a443b3d1SSam McCall #include "llvm/Support/Program.h" 70b56b15edSSam McCall #include "llvm/Support/ScopedPrinter.h" 71a443b3d1SSam McCall #include "llvm/Support/raw_ostream.h" 72a443b3d1SSam McCall // Defines assets: HTMLLogger_{html_js,css} 73a443b3d1SSam McCall #include "HTMLLogger.inc" 74a443b3d1SSam McCall 75a443b3d1SSam McCall namespace clang::dataflow { 76a443b3d1SSam McCall namespace { 77a443b3d1SSam McCall 78a443b3d1SSam McCall // Render a graphviz graph specification to SVG using the `dot` tool. 79a443b3d1SSam McCall llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph); 80a443b3d1SSam McCall 81a443b3d1SSam McCall using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>; 82a443b3d1SSam McCall 83b56b15edSSam McCall // Recursively dumps Values/StorageLocations as JSON 84b56b15edSSam McCall class ModelDumper { 85b56b15edSSam McCall public: 86b56b15edSSam McCall ModelDumper(llvm::json::OStream &JOS, const Environment &Env) 87b56b15edSSam McCall : JOS(JOS), Env(Env) {} 88b56b15edSSam McCall 89b56b15edSSam McCall void dump(Value &V) { 90b56b15edSSam McCall JOS.attribute("value_id", llvm::to_string(&V)); 91b56b15edSSam McCall if (!Visited.insert(&V).second) 92b56b15edSSam McCall return; 93b56b15edSSam McCall 94b56b15edSSam McCall JOS.attribute("kind", debugString(V.getKind())); 95b56b15edSSam McCall 96b56b15edSSam McCall switch (V.getKind()) { 97b56b15edSSam McCall case Value::Kind::Integer: 98b56b15edSSam McCall case Value::Kind::TopBool: 99b56b15edSSam McCall case Value::Kind::AtomicBool: 100fc9821a8SSam McCall case Value::Kind::FormulaBool: 101b56b15edSSam McCall break; 102b56b15edSSam McCall case Value::Kind::Pointer: 103b56b15edSSam McCall JOS.attributeObject( 104b56b15edSSam McCall "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); }); 105b56b15edSSam McCall break; 106b56b15edSSam McCall } 107b56b15edSSam McCall 108b56b15edSSam McCall for (const auto& Prop : V.properties()) 109b56b15edSSam McCall JOS.attributeObject(("p:" + Prop.first()).str(), 110b56b15edSSam McCall [&] { dump(*Prop.second); }); 111b56b15edSSam McCall 112b56b15edSSam McCall // Running the SAT solver is expensive, but knowing which booleans are 113b56b15edSSam McCall // guaranteed true/false here is valuable and hard to determine by hand. 114b56b15edSSam McCall if (auto *B = llvm::dyn_cast<BoolValue>(&V)) { 115fc9821a8SSam McCall JOS.attribute("formula", llvm::to_string(B->formula())); 116526c9b7eSmartinboehme JOS.attribute("truth", Env.proves(B->formula()) ? "true" 117526c9b7eSmartinboehme : Env.proves(Env.arena().makeNot(B->formula())) 118b56b15edSSam McCall ? "false" 119b56b15edSSam McCall : "unknown"); 120b56b15edSSam McCall } 121b56b15edSSam McCall } 122b56b15edSSam McCall void dump(const StorageLocation &L) { 123b56b15edSSam McCall JOS.attribute("location", llvm::to_string(&L)); 124b56b15edSSam McCall if (!Visited.insert(&L).second) 125b56b15edSSam McCall return; 126b56b15edSSam McCall 127b56b15edSSam McCall JOS.attribute("type", L.getType().getAsString()); 128e8fce958Smartinboehme if (!L.getType()->isRecordType()) 129b56b15edSSam McCall if (auto *V = Env.getValue(L)) 130b56b15edSSam McCall dump(*V); 131e791535bSKinuko Yasuda 132e791535bSKinuko Yasuda if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) { 133e791535bSKinuko Yasuda for (const auto &Child : RLoc->children()) 134e791535bSKinuko Yasuda JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] { 135e791535bSKinuko Yasuda if (Child.second) 136*564fd62aSFlorian Mayer dump(*Child.second); 137e791535bSKinuko Yasuda }); 13871f2ec2dSmartinboehme 13971f2ec2dSmartinboehme for (const auto &SyntheticField : RLoc->synthetic_fields()) 14071f2ec2dSmartinboehme JOS.attributeObject(("sf:" + SyntheticField.first()).str(), 14171f2ec2dSmartinboehme [&] { dump(*SyntheticField.second); }); 142e791535bSKinuko Yasuda } 143b56b15edSSam McCall } 144b56b15edSSam McCall 145b56b15edSSam McCall llvm::DenseSet<const void*> Visited; 146b56b15edSSam McCall llvm::json::OStream &JOS; 147b56b15edSSam McCall const Environment &Env; 148b56b15edSSam McCall }; 149b56b15edSSam McCall 150a443b3d1SSam McCall class HTMLLogger : public Logger { 151ed65ced2Smartinboehme struct Iteration { 152ed65ced2Smartinboehme const CFGBlock *Block; 153ed65ced2Smartinboehme unsigned Iter; 154ed65ced2Smartinboehme bool PostVisit; 1552be7c651Smartinboehme bool Converged; 156ed65ced2Smartinboehme }; 157ed65ced2Smartinboehme 158a443b3d1SSam McCall StreamFactory Streams; 159a443b3d1SSam McCall std::unique_ptr<llvm::raw_ostream> OS; 16082324bc9Smartinboehme std::string JSON; 16182324bc9Smartinboehme llvm::raw_string_ostream JStringStream{JSON}; 16282324bc9Smartinboehme llvm::json::OStream JOS{JStringStream, /*Indent=*/2}; 163a443b3d1SSam McCall 16459ff3adcSmartinboehme const AdornedCFG *ACFG; 165a443b3d1SSam McCall // Timeline of iterations of CFG block visitation. 166ed65ced2Smartinboehme std::vector<Iteration> Iters; 1672be7c651Smartinboehme // Indexes in `Iters` of the iterations for each block. 1682be7c651Smartinboehme llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters; 16982324bc9Smartinboehme // For a given block ID, did the block converge (on the last iteration)? 17082324bc9Smartinboehme llvm::BitVector BlockConverged; 171a443b3d1SSam McCall // The messages logged in the current context but not yet written. 172a443b3d1SSam McCall std::string ContextLogs; 173a443b3d1SSam McCall // The number of elements we have visited within the current CFG block. 174a443b3d1SSam McCall unsigned ElementIndex; 175a443b3d1SSam McCall 176a443b3d1SSam McCall public: 177a443b3d1SSam McCall explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {} 17859ff3adcSmartinboehme void beginAnalysis(const AdornedCFG &ACFG, 179a443b3d1SSam McCall TypeErasedDataflowAnalysis &A) override { 180a443b3d1SSam McCall OS = Streams(); 18159ff3adcSmartinboehme this->ACFG = &ACFG; 182a443b3d1SSam McCall *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first; 183a443b3d1SSam McCall 18459ff3adcSmartinboehme BlockConverged.resize(ACFG.getCFG().getNumBlockIDs()); 18582324bc9Smartinboehme 18659ff3adcSmartinboehme const auto &D = ACFG.getDecl(); 187a443b3d1SSam McCall const auto &SM = A.getASTContext().getSourceManager(); 188a443b3d1SSam McCall *OS << "<title>"; 189e6cd409fSMartin Braenne if (const auto *ND = dyn_cast<NamedDecl>(&D)) 190a443b3d1SSam McCall *OS << ND->getNameAsString() << " at "; 191e6cd409fSMartin Braenne *OS << SM.getFilename(D.getLocation()) << ":" 192e6cd409fSMartin Braenne << SM.getSpellingLineNumber(D.getLocation()); 193a443b3d1SSam McCall *OS << "</title>\n"; 194a443b3d1SSam McCall 195a443b3d1SSam McCall *OS << "<style>" << HTMLLogger_css << "</style>\n"; 196a443b3d1SSam McCall *OS << "<script>" << HTMLLogger_js << "</script>\n"; 197a443b3d1SSam McCall 198a443b3d1SSam McCall writeCode(); 19982324bc9Smartinboehme JOS.objectBegin(); 20082324bc9Smartinboehme JOS.attributeBegin("states"); 20182324bc9Smartinboehme JOS.objectBegin(); 202a443b3d1SSam McCall } 203a443b3d1SSam McCall // Between beginAnalysis() and endAnalysis() we write all the states for 204a443b3d1SSam McCall // particular analysis points into the `timeline` array. 205a443b3d1SSam McCall void endAnalysis() override { 20682324bc9Smartinboehme JOS.objectEnd(); 20782324bc9Smartinboehme JOS.attributeEnd(); 208a443b3d1SSam McCall 20982324bc9Smartinboehme JOS.attributeArray("timeline", [&] { 210a443b3d1SSam McCall for (const auto &E : Iters) { 21182324bc9Smartinboehme JOS.object([&] { 21282324bc9Smartinboehme JOS.attribute("block", blockID(E.Block->getBlockID())); 21382324bc9Smartinboehme JOS.attribute("iter", E.Iter); 21482324bc9Smartinboehme JOS.attribute("post_visit", E.PostVisit); 21582324bc9Smartinboehme JOS.attribute("converged", E.Converged); 216a443b3d1SSam McCall }); 217a443b3d1SSam McCall } 218a443b3d1SSam McCall }); 21982324bc9Smartinboehme JOS.attributeObject("cfg", [&] { 220a443b3d1SSam McCall for (const auto &E : BlockIters) 221a443b3d1SSam McCall writeBlock(*E.first, E.second); 222a443b3d1SSam McCall }); 223a443b3d1SSam McCall 22482324bc9Smartinboehme JOS.objectEnd(); 22582324bc9Smartinboehme 22682324bc9Smartinboehme writeCFG(); 22782324bc9Smartinboehme 22882324bc9Smartinboehme *OS << "<script>var HTMLLoggerData = \n"; 22982324bc9Smartinboehme *OS << JSON; 230a443b3d1SSam McCall *OS << ";\n</script>\n"; 231a443b3d1SSam McCall *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second; 232a443b3d1SSam McCall } 233a443b3d1SSam McCall 234ed65ced2Smartinboehme void enterBlock(const CFGBlock &B, bool PostVisit) override { 2352be7c651Smartinboehme llvm::SmallVector<size_t> &BIter = BlockIters[&B]; 236ed65ced2Smartinboehme unsigned IterNum = BIter.size() + 1; 2372be7c651Smartinboehme BIter.push_back(Iters.size()); 2382be7c651Smartinboehme Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false}); 23982324bc9Smartinboehme if (!PostVisit) 24082324bc9Smartinboehme BlockConverged[B.getBlockID()] = false; 241a443b3d1SSam McCall ElementIndex = 0; 242a443b3d1SSam McCall } 243a443b3d1SSam McCall void enterElement(const CFGElement &E) override { 244a443b3d1SSam McCall ++ElementIndex; 245a443b3d1SSam McCall } 246a443b3d1SSam McCall 247a443b3d1SSam McCall static std::string blockID(unsigned Block) { 248a443b3d1SSam McCall return llvm::formatv("B{0}", Block); 249a443b3d1SSam McCall } 250a443b3d1SSam McCall static std::string eltID(unsigned Block, unsigned Element) { 251a443b3d1SSam McCall return llvm::formatv("B{0}.{1}", Block, Element); 252a443b3d1SSam McCall } 253a443b3d1SSam McCall static std::string iterID(unsigned Block, unsigned Iter) { 254a443b3d1SSam McCall return llvm::formatv("B{0}:{1}", Block, Iter); 255a443b3d1SSam McCall } 256a443b3d1SSam McCall static std::string elementIterID(unsigned Block, unsigned Iter, 257a443b3d1SSam McCall unsigned Element) { 258a443b3d1SSam McCall return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element); 259a443b3d1SSam McCall } 260a443b3d1SSam McCall 261a443b3d1SSam McCall // Write the analysis state associated with a particular analysis point. 262a443b3d1SSam McCall // FIXME: this dump is fairly opaque. We should show: 263a443b3d1SSam McCall // - values associated with the current Stmt 264a443b3d1SSam McCall // - values associated with its children 265a443b3d1SSam McCall // - meaningful names for values 266a443b3d1SSam McCall // - which boolean values are implied true/false by the flow condition 267a443b3d1SSam McCall void recordState(TypeErasedDataflowAnalysisState &State) override { 268ed65ced2Smartinboehme unsigned Block = Iters.back().Block->getBlockID(); 269ed65ced2Smartinboehme unsigned Iter = Iters.back().Iter; 270ed65ced2Smartinboehme bool PostVisit = Iters.back().PostVisit; 27182324bc9Smartinboehme JOS.attributeObject(elementIterID(Block, Iter, ElementIndex), [&] { 27282324bc9Smartinboehme JOS.attribute("block", blockID(Block)); 27382324bc9Smartinboehme JOS.attribute("iter", Iter); 27482324bc9Smartinboehme JOS.attribute("post_visit", PostVisit); 27582324bc9Smartinboehme JOS.attribute("element", ElementIndex); 276b56b15edSSam McCall 277b56b15edSSam McCall // If this state immediately follows an Expr, show its built-in model. 278b56b15edSSam McCall if (ElementIndex > 0) { 279b56b15edSSam McCall auto S = 280ed65ced2Smartinboehme Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>(); 281f76f6674SMartin Braenne if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) { 282f76f6674SMartin Braenne if (E->isPRValue()) { 283e8fce958Smartinboehme if (!E->getType()->isRecordType()) 284f76f6674SMartin Braenne if (auto *V = State.Env.getValue(*E)) 28582324bc9Smartinboehme JOS.attributeObject( 28682324bc9Smartinboehme "value", [&] { ModelDumper(JOS, State.Env).dump(*V); }); 287f76f6674SMartin Braenne } else { 288b244b6aeSMartin Braenne if (auto *Loc = State.Env.getStorageLocation(*E)) 28982324bc9Smartinboehme JOS.attributeObject( 29082324bc9Smartinboehme "value", [&] { ModelDumper(JOS, State.Env).dump(*Loc); }); 291b56b15edSSam McCall } 292f76f6674SMartin Braenne } 293f76f6674SMartin Braenne } 294a443b3d1SSam McCall if (!ContextLogs.empty()) { 29582324bc9Smartinboehme JOS.attribute("logs", ContextLogs); 296a443b3d1SSam McCall ContextLogs.clear(); 297a443b3d1SSam McCall } 298a443b3d1SSam McCall { 299a443b3d1SSam McCall std::string BuiltinLattice; 300a443b3d1SSam McCall llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice); 301a443b3d1SSam McCall State.Env.dump(BuiltinLatticeS); 30282324bc9Smartinboehme JOS.attribute("builtinLattice", BuiltinLattice); 303a443b3d1SSam McCall } 304a443b3d1SSam McCall }); 305a443b3d1SSam McCall } 30682324bc9Smartinboehme void blockConverged() override { 30782324bc9Smartinboehme Iters.back().Converged = true; 30882324bc9Smartinboehme BlockConverged[Iters.back().Block->getBlockID()] = true; 30982324bc9Smartinboehme } 310a443b3d1SSam McCall 311a443b3d1SSam McCall void logText(llvm::StringRef S) override { 312a443b3d1SSam McCall ContextLogs.append(S.begin(), S.end()); 313a443b3d1SSam McCall ContextLogs.push_back('\n'); 314a443b3d1SSam McCall } 315a443b3d1SSam McCall 316a443b3d1SSam McCall private: 317a443b3d1SSam McCall // Write the CFG block details. 318a443b3d1SSam McCall // Currently this is just the list of elements in execution order. 319a443b3d1SSam McCall // FIXME: an AST dump would be a useful view, too. 3202be7c651Smartinboehme void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) { 32182324bc9Smartinboehme JOS.attributeObject(blockID(B.getBlockID()), [&] { 32282324bc9Smartinboehme JOS.attributeArray("iters", [&] { 3232be7c651Smartinboehme for (size_t IterIdx : ItersForB) { 3242be7c651Smartinboehme const Iteration &Iter = Iters[IterIdx]; 32582324bc9Smartinboehme JOS.object([&] { 32682324bc9Smartinboehme JOS.attribute("iter", Iter.Iter); 32782324bc9Smartinboehme JOS.attribute("post_visit", Iter.PostVisit); 32882324bc9Smartinboehme JOS.attribute("converged", Iter.Converged); 329ed65ced2Smartinboehme }); 330ed65ced2Smartinboehme } 331ed65ced2Smartinboehme }); 33282324bc9Smartinboehme JOS.attributeArray("elements", [&] { 333a443b3d1SSam McCall for (const auto &Elt : B.Elements) { 334a443b3d1SSam McCall std::string Dump; 335a443b3d1SSam McCall llvm::raw_string_ostream DumpS(Dump); 336a443b3d1SSam McCall Elt.dumpToStream(DumpS); 33782324bc9Smartinboehme JOS.value(Dump); 338a443b3d1SSam McCall } 339a443b3d1SSam McCall }); 340a443b3d1SSam McCall }); 341a443b3d1SSam McCall } 342a443b3d1SSam McCall 343a443b3d1SSam McCall // Write the code of function being examined. 344a443b3d1SSam McCall // We want to overlay the code with <span>s that mark which BB particular 345a443b3d1SSam McCall // tokens are associated with, and even which BB element (so that clicking 346a443b3d1SSam McCall // can select the right element). 347a443b3d1SSam McCall void writeCode() { 34859ff3adcSmartinboehme const auto &AST = ACFG->getDecl().getASTContext(); 349a443b3d1SSam McCall bool Invalid = false; 350a443b3d1SSam McCall 351a443b3d1SSam McCall // Extract the source code from the original file. 352a443b3d1SSam McCall // Pretty-printing from the AST would probably be nicer (no macros or 353a443b3d1SSam McCall // indentation to worry about), but we need the boundaries of particular 354a443b3d1SSam McCall // AST nodes and the printer doesn't provide this. 355a443b3d1SSam McCall auto Range = clang::Lexer::makeFileCharRange( 35659ff3adcSmartinboehme CharSourceRange::getTokenRange(ACFG->getDecl().getSourceRange()), 357a443b3d1SSam McCall AST.getSourceManager(), AST.getLangOpts()); 358a443b3d1SSam McCall if (Range.isInvalid()) 359a443b3d1SSam McCall return; 360a443b3d1SSam McCall llvm::StringRef Code = clang::Lexer::getSourceText( 361a443b3d1SSam McCall Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid); 362a443b3d1SSam McCall if (Invalid) 363a443b3d1SSam McCall return; 364a443b3d1SSam McCall 365a443b3d1SSam McCall // TokenInfo stores the BB and set of elements that a token is part of. 366a443b3d1SSam McCall struct TokenInfo { 36746a56931SKazu Hirata enum : unsigned { Missing = static_cast<unsigned>(-1) }; 36811dfb3cbSAaron Ballman 369a443b3d1SSam McCall // The basic block this is part of. 370a443b3d1SSam McCall // This is the BB of the stmt with the smallest containing range. 371a443b3d1SSam McCall unsigned BB = Missing; 372a443b3d1SSam McCall unsigned BBPriority = 0; 373a443b3d1SSam McCall // The most specific stmt this is part of (smallest range). 374a443b3d1SSam McCall unsigned Elt = Missing; 375a443b3d1SSam McCall unsigned EltPriority = 0; 376a443b3d1SSam McCall // All stmts this is part of. 377a443b3d1SSam McCall SmallVector<unsigned> Elts; 378a443b3d1SSam McCall 379a443b3d1SSam McCall // Mark this token as being part of BB.Elt. 380a443b3d1SSam McCall // RangeLen is the character length of the element's range, used to 381a443b3d1SSam McCall // distinguish inner vs outer statements. 382a443b3d1SSam McCall // For example in `a==0`, token "a" is part of the stmts "a" and "a==0". 383a443b3d1SSam McCall // However "a" has a smaller range, so is more specific. Clicking on the 384a443b3d1SSam McCall // token "a" should select the stmt "a". 385a443b3d1SSam McCall void assign(unsigned BB, unsigned Elt, unsigned RangeLen) { 386a443b3d1SSam McCall // A worse BB (larger range) => ignore. 387a443b3d1SSam McCall if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen) 388a443b3d1SSam McCall return; 389a443b3d1SSam McCall if (BB != this->BB) { 390a443b3d1SSam McCall this->BB = BB; 391a443b3d1SSam McCall Elts.clear(); 392a443b3d1SSam McCall BBPriority = RangeLen; 393a443b3d1SSam McCall } 394a443b3d1SSam McCall BBPriority = std::min(BBPriority, RangeLen); 395a443b3d1SSam McCall Elts.push_back(Elt); 396a443b3d1SSam McCall if (this->Elt == Missing || EltPriority > RangeLen) 397a443b3d1SSam McCall this->Elt = Elt; 398a443b3d1SSam McCall } 399a443b3d1SSam McCall bool operator==(const TokenInfo &Other) const { 400a443b3d1SSam McCall return std::tie(BB, Elt, Elts) == 401a443b3d1SSam McCall std::tie(Other.BB, Other.Elt, Other.Elts); 402a443b3d1SSam McCall } 403a443b3d1SSam McCall // Write the attributes for the <span> on this token. 404a443b3d1SSam McCall void write(llvm::raw_ostream &OS) const { 405a443b3d1SSam McCall OS << "class='c"; 406a443b3d1SSam McCall if (BB != Missing) 407a443b3d1SSam McCall OS << " " << blockID(BB); 408a443b3d1SSam McCall for (unsigned Elt : Elts) 409a443b3d1SSam McCall OS << " " << eltID(BB, Elt); 410a443b3d1SSam McCall OS << "'"; 411a443b3d1SSam McCall 412a443b3d1SSam McCall if (Elt != Missing) 413a443b3d1SSam McCall OS << " data-elt='" << eltID(BB, Elt) << "'"; 414a443b3d1SSam McCall if (BB != Missing) 415a443b3d1SSam McCall OS << " data-bb='" << blockID(BB) << "'"; 416a443b3d1SSam McCall } 417a443b3d1SSam McCall }; 418a443b3d1SSam McCall 419a443b3d1SSam McCall // Construct one TokenInfo per character in a flat array. 420a443b3d1SSam McCall // This is inefficient (chars in a token all have the same info) but simple. 421a443b3d1SSam McCall std::vector<TokenInfo> State(Code.size()); 42259ff3adcSmartinboehme for (const auto *Block : ACFG->getCFG()) { 423a443b3d1SSam McCall unsigned EltIndex = 0; 424a443b3d1SSam McCall for (const auto& Elt : *Block) { 425a443b3d1SSam McCall ++EltIndex; 426a443b3d1SSam McCall if (const auto S = Elt.getAs<CFGStmt>()) { 427a443b3d1SSam McCall auto EltRange = clang::Lexer::makeFileCharRange( 428a443b3d1SSam McCall CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()), 429a443b3d1SSam McCall AST.getSourceManager(), AST.getLangOpts()); 430a443b3d1SSam McCall if (EltRange.isInvalid()) 431a443b3d1SSam McCall continue; 432a443b3d1SSam McCall if (EltRange.getBegin() < Range.getBegin() || 433a443b3d1SSam McCall EltRange.getEnd() >= Range.getEnd() || 434a443b3d1SSam McCall EltRange.getEnd() < Range.getBegin() || 435a443b3d1SSam McCall EltRange.getEnd() >= Range.getEnd()) 436a443b3d1SSam McCall continue; 437a443b3d1SSam McCall unsigned Off = EltRange.getBegin().getRawEncoding() - 438a443b3d1SSam McCall Range.getBegin().getRawEncoding(); 439a443b3d1SSam McCall unsigned Len = EltRange.getEnd().getRawEncoding() - 440a443b3d1SSam McCall EltRange.getBegin().getRawEncoding(); 441a443b3d1SSam McCall for (unsigned I = 0; I < Len; ++I) 442a443b3d1SSam McCall State[Off + I].assign(Block->getBlockID(), EltIndex, Len); 443a443b3d1SSam McCall } 444a443b3d1SSam McCall } 445a443b3d1SSam McCall } 446a443b3d1SSam McCall 447a443b3d1SSam McCall // Finally, write the code with the correct <span>s. 448a443b3d1SSam McCall unsigned Line = 449a443b3d1SSam McCall AST.getSourceManager().getSpellingLineNumber(Range.getBegin()); 450a443b3d1SSam McCall *OS << "<template data-copy='code'>\n"; 451a443b3d1SSam McCall *OS << "<code class='filename'>"; 452a443b3d1SSam McCall llvm::printHTMLEscaped( 453a443b3d1SSam McCall llvm::sys::path::filename( 454a443b3d1SSam McCall AST.getSourceManager().getFilename(Range.getBegin())), 455a443b3d1SSam McCall *OS); 456a443b3d1SSam McCall *OS << "</code>"; 457a443b3d1SSam McCall *OS << "<code class='line' data-line='" << Line++ << "'>"; 458a443b3d1SSam McCall for (unsigned I = 0; I < Code.size(); ++I) { 459a443b3d1SSam McCall // Don't actually write a <span> around each character, only break spans 460a443b3d1SSam McCall // when the TokenInfo changes. 461a443b3d1SSam McCall bool NeedOpen = I == 0 || !(State[I] == State[I-1]); 462a443b3d1SSam McCall bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]); 463a443b3d1SSam McCall if (NeedOpen) { 464a443b3d1SSam McCall *OS << "<span "; 465a443b3d1SSam McCall State[I].write(*OS); 466a443b3d1SSam McCall *OS << ">"; 467a443b3d1SSam McCall } 468a443b3d1SSam McCall if (Code[I] == '\n') 469a443b3d1SSam McCall *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>"; 470a443b3d1SSam McCall else 471a443b3d1SSam McCall llvm::printHTMLEscaped(Code.substr(I, 1), *OS); 472a443b3d1SSam McCall if (NeedClose) *OS << "</span>"; 473a443b3d1SSam McCall } 474a443b3d1SSam McCall *OS << "</code>\n"; 475a443b3d1SSam McCall *OS << "</template>"; 476a443b3d1SSam McCall } 477a443b3d1SSam McCall 478a443b3d1SSam McCall // Write the CFG diagram, a graph of basic blocks. 479a443b3d1SSam McCall // Laying out graphs is hard, so we construct a graphviz description and shell 480a443b3d1SSam McCall // out to `dot` to turn it into an SVG. 481a443b3d1SSam McCall void writeCFG() { 482a443b3d1SSam McCall *OS << "<template data-copy='cfg'>\n"; 48359ff3adcSmartinboehme if (auto SVG = renderSVG(buildCFGDot(ACFG->getCFG()))) 484a443b3d1SSam McCall *OS << *SVG; 485a443b3d1SSam McCall else 486a443b3d1SSam McCall *OS << "Can't draw CFG: " << toString(SVG.takeError()); 487a443b3d1SSam McCall *OS << "</template>\n"; 488a443b3d1SSam McCall } 489a443b3d1SSam McCall 490a443b3d1SSam McCall // Produce a graphviz description of a CFG. 49182324bc9Smartinboehme std::string buildCFGDot(const clang::CFG &CFG) { 492a443b3d1SSam McCall std::string Graph; 493a443b3d1SSam McCall llvm::raw_string_ostream GraphS(Graph); 494a443b3d1SSam McCall // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses. 495a443b3d1SSam McCall GraphS << R"(digraph { 496a443b3d1SSam McCall tooltip=" " 497a443b3d1SSam McCall node[class=bb, shape=square, fontname="sans-serif", tooltip=" "] 498a443b3d1SSam McCall edge[tooltip = " "] 499a443b3d1SSam McCall )"; 50082324bc9Smartinboehme for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) { 50182324bc9Smartinboehme std::string Name = blockID(I); 50282324bc9Smartinboehme // Rightwards arrow, vertical line 503a11ab139Smartinboehme const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c"; 50482324bc9Smartinboehme if (BlockConverged[I]) 50582324bc9Smartinboehme Name += ConvergenceMarker; 50682324bc9Smartinboehme GraphS << " " << blockID(I) << " [id=" << blockID(I) << " label=\"" 50782324bc9Smartinboehme << Name << "\"]\n"; 50882324bc9Smartinboehme } 509a443b3d1SSam McCall for (const auto *Block : CFG) { 510a443b3d1SSam McCall for (const auto &Succ : Block->succs()) { 511771d7d71SMartin Braenne if (Succ.getReachableBlock()) 512a443b3d1SSam McCall GraphS << " " << blockID(Block->getBlockID()) << " -> " 513a443b3d1SSam McCall << blockID(Succ.getReachableBlock()->getBlockID()) << "\n"; 514a443b3d1SSam McCall } 515a443b3d1SSam McCall } 516a443b3d1SSam McCall GraphS << "}\n"; 517a443b3d1SSam McCall return Graph; 518a443b3d1SSam McCall } 519a443b3d1SSam McCall }; 520a443b3d1SSam McCall 521a443b3d1SSam McCall // Nothing interesting here, just subprocess/temp-file plumbing. 522a443b3d1SSam McCall llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) { 5230304aa25SSam McCall std::string DotPath; 5240304aa25SSam McCall if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT")) 5250304aa25SSam McCall DotPath = FromEnv; 5260304aa25SSam McCall else { 5270304aa25SSam McCall auto FromPath = llvm::sys::findProgramByName("dot"); 5280304aa25SSam McCall if (!FromPath) 5290304aa25SSam McCall return llvm::createStringError(FromPath.getError(), 5300304aa25SSam McCall "'dot' not found on PATH"); 5310304aa25SSam McCall DotPath = FromPath.get(); 5320304aa25SSam McCall } 533a443b3d1SSam McCall 534a443b3d1SSam McCall // Create input and output files for `dot` subprocess. 535a443b3d1SSam McCall // (We create the output file as empty, to reserve the temp filename). 536a443b3d1SSam McCall llvm::SmallString<256> Input, Output; 537a443b3d1SSam McCall int InputFD; 538a443b3d1SSam McCall if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD, 539a443b3d1SSam McCall Input)) 540a443b3d1SSam McCall return llvm::createStringError(EC, "failed to create `dot` temp input"); 541a443b3d1SSam McCall llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph; 542a443b3d1SSam McCall auto DeleteInput = 543a443b3d1SSam McCall llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); }); 544a443b3d1SSam McCall if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output)) 545a443b3d1SSam McCall return llvm::createStringError(EC, "failed to create `dot` temp output"); 546a443b3d1SSam McCall auto DeleteOutput = 547a443b3d1SSam McCall llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); }); 548a443b3d1SSam McCall 549a443b3d1SSam McCall std::vector<std::optional<llvm::StringRef>> Redirects = { 550a443b3d1SSam McCall Input, Output, 551a443b3d1SSam McCall /*stderr=*/std::nullopt}; 552a443b3d1SSam McCall std::string ErrMsg; 553a443b3d1SSam McCall int Code = llvm::sys::ExecuteAndWait( 5540304aa25SSam McCall DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects, 555a443b3d1SSam McCall /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); 556a443b3d1SSam McCall if (!ErrMsg.empty()) 557a443b3d1SSam McCall return llvm::createStringError(llvm::inconvertibleErrorCode(), 558a443b3d1SSam McCall "'dot' failed: " + ErrMsg); 559a443b3d1SSam McCall if (Code != 0) 560a443b3d1SSam McCall return llvm::createStringError(llvm::inconvertibleErrorCode(), 561a443b3d1SSam McCall "'dot' failed (" + llvm::Twine(Code) + ")"); 562a443b3d1SSam McCall 563a443b3d1SSam McCall auto Buf = llvm::MemoryBuffer::getFile(Output); 564a443b3d1SSam McCall if (!Buf) 565a443b3d1SSam McCall return llvm::createStringError(Buf.getError(), "Can't read `dot` output"); 566a443b3d1SSam McCall 567a443b3d1SSam McCall // Output has <?xml> prefix we don't want. Skip to <svg> tag. 568a443b3d1SSam McCall llvm::StringRef Result = Buf.get()->getBuffer(); 569a443b3d1SSam McCall auto Pos = Result.find("<svg"); 570a443b3d1SSam McCall if (Pos == llvm::StringRef::npos) 571a443b3d1SSam McCall return llvm::createStringError(llvm::inconvertibleErrorCode(), 572a443b3d1SSam McCall "Can't find <svg> tag in `dot` output"); 573a443b3d1SSam McCall return Result.substr(Pos).str(); 574a443b3d1SSam McCall } 575a443b3d1SSam McCall 576a443b3d1SSam McCall } // namespace 577a443b3d1SSam McCall 578a443b3d1SSam McCall std::unique_ptr<Logger> 579a443b3d1SSam McCall Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) { 580a443b3d1SSam McCall return std::make_unique<HTMLLogger>(std::move(Streams)); 581a443b3d1SSam McCall } 582a443b3d1SSam McCall 583a443b3d1SSam McCall } // namespace clang::dataflow 584