1 //===-- HTMLLogger.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the HTML logger. Given a directory dir/, we write 10 // dir/0.html for the first analysis, etc. 11 // These files contain a visualization that allows inspecting the CFG and the 12 // state of the analysis at each point. 13 // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded 14 // so each output file is self-contained. 15 // 16 // VIEWS 17 // 18 // The timeline and function view are always shown. These allow selecting basic 19 // blocks, statements within them, and processing iterations (BBs are visited 20 // multiple times when e.g. loops are involved). 21 // These are written directly into the HTML body. 22 // 23 // There are also listings of particular basic blocks, and dumps of the state 24 // at particular analysis points (i.e. BB2 iteration 3 statement 2). 25 // These are only shown when the relevant BB/analysis point is *selected*. 26 // 27 // DATA AND TEMPLATES 28 // 29 // The HTML proper is mostly static. 30 // The analysis data is in a JSON object HTMLLoggerData which is embedded as 31 // a <script> in the <head>. 32 // This gets rendered into DOM by a simple template processor which substitutes 33 // the data into <template> tags embedded in the HTML. (see inflate() in JS). 34 // 35 // SELECTION 36 // 37 // This is the only real interactive mechanism. 38 // 39 // At any given time, there are several named selections, e.g.: 40 // bb: B2 (basic block 0 is selected) 41 // elt: B2.4 (statement 4 is selected) 42 // iter: B2:1 (iteration 1 of the basic block is selected) 43 // hover: B3 (hovering over basic block 3) 44 // 45 // The selection is updated by mouse events: hover by moving the mouse and 46 // others by clicking. Elements that are click targets generally have attributes 47 // (id or data-foo) that define what they should select. 48 // See watchSelection() in JS for the exact logic. 49 // 50 // When the "bb" selection is set to "B2": 51 // - sections <section data-selection="bb"> get shown 52 // - templates under such sections get re-rendered 53 // - elements with class/id "B2" get class "bb-select" 54 // 55 //===----------------------------------------------------------------------===// 56 57 #include "clang/Analysis/FlowSensitive/AdornedCFG.h" 58 #include "clang/Analysis/FlowSensitive/DebugSupport.h" 59 #include "clang/Analysis/FlowSensitive/Logger.h" 60 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" 61 #include "clang/Analysis/FlowSensitive/Value.h" 62 #include "clang/Basic/SourceManager.h" 63 #include "clang/Lex/Lexer.h" 64 #include "llvm/ADT/DenseMap.h" 65 #include "llvm/ADT/ScopeExit.h" 66 #include "llvm/Support/Error.h" 67 #include "llvm/Support/FormatVariadic.h" 68 #include "llvm/Support/JSON.h" 69 #include "llvm/Support/Program.h" 70 #include "llvm/Support/ScopedPrinter.h" 71 #include "llvm/Support/raw_ostream.h" 72 // Defines assets: HTMLLogger_{html_js,css} 73 #include "HTMLLogger.inc" 74 75 namespace clang::dataflow { 76 namespace { 77 78 // Render a graphviz graph specification to SVG using the `dot` tool. 79 llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph); 80 81 using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>; 82 83 // Recursively dumps Values/StorageLocations as JSON 84 class ModelDumper { 85 public: 86 ModelDumper(llvm::json::OStream &JOS, const Environment &Env) 87 : JOS(JOS), Env(Env) {} 88 89 void dump(Value &V) { 90 JOS.attribute("value_id", llvm::to_string(&V)); 91 if (!Visited.insert(&V).second) 92 return; 93 94 JOS.attribute("kind", debugString(V.getKind())); 95 96 switch (V.getKind()) { 97 case Value::Kind::Integer: 98 case Value::Kind::TopBool: 99 case Value::Kind::AtomicBool: 100 case Value::Kind::FormulaBool: 101 break; 102 case Value::Kind::Pointer: 103 JOS.attributeObject( 104 "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); }); 105 break; 106 } 107 108 for (const auto& Prop : V.properties()) 109 JOS.attributeObject(("p:" + Prop.first()).str(), 110 [&] { dump(*Prop.second); }); 111 112 // Running the SAT solver is expensive, but knowing which booleans are 113 // guaranteed true/false here is valuable and hard to determine by hand. 114 if (auto *B = llvm::dyn_cast<BoolValue>(&V)) { 115 JOS.attribute("formula", llvm::to_string(B->formula())); 116 JOS.attribute("truth", Env.proves(B->formula()) ? "true" 117 : Env.proves(Env.arena().makeNot(B->formula())) 118 ? "false" 119 : "unknown"); 120 } 121 } 122 void dump(const StorageLocation &L) { 123 JOS.attribute("location", llvm::to_string(&L)); 124 if (!Visited.insert(&L).second) 125 return; 126 127 JOS.attribute("type", L.getType().getAsString()); 128 if (!L.getType()->isRecordType()) 129 if (auto *V = Env.getValue(L)) 130 dump(*V); 131 132 if (auto *RLoc = dyn_cast<RecordStorageLocation>(&L)) { 133 for (const auto &Child : RLoc->children()) 134 JOS.attributeObject("f:" + Child.first->getNameAsString(), [&] { 135 if (Child.second) 136 dump(*Child.second); 137 }); 138 139 for (const auto &SyntheticField : RLoc->synthetic_fields()) 140 JOS.attributeObject(("sf:" + SyntheticField.first()).str(), 141 [&] { dump(*SyntheticField.second); }); 142 } 143 } 144 145 llvm::DenseSet<const void*> Visited; 146 llvm::json::OStream &JOS; 147 const Environment &Env; 148 }; 149 150 class HTMLLogger : public Logger { 151 struct Iteration { 152 const CFGBlock *Block; 153 unsigned Iter; 154 bool PostVisit; 155 bool Converged; 156 }; 157 158 StreamFactory Streams; 159 std::unique_ptr<llvm::raw_ostream> OS; 160 std::string JSON; 161 llvm::raw_string_ostream JStringStream{JSON}; 162 llvm::json::OStream JOS{JStringStream, /*Indent=*/2}; 163 164 const AdornedCFG *ACFG; 165 // Timeline of iterations of CFG block visitation. 166 std::vector<Iteration> Iters; 167 // Indexes in `Iters` of the iterations for each block. 168 llvm::DenseMap<const CFGBlock *, llvm::SmallVector<size_t>> BlockIters; 169 // For a given block ID, did the block converge (on the last iteration)? 170 llvm::BitVector BlockConverged; 171 // The messages logged in the current context but not yet written. 172 std::string ContextLogs; 173 // The number of elements we have visited within the current CFG block. 174 unsigned ElementIndex; 175 176 public: 177 explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {} 178 void beginAnalysis(const AdornedCFG &ACFG, 179 TypeErasedDataflowAnalysis &A) override { 180 OS = Streams(); 181 this->ACFG = &ACFG; 182 *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first; 183 184 BlockConverged.resize(ACFG.getCFG().getNumBlockIDs()); 185 186 const auto &D = ACFG.getDecl(); 187 const auto &SM = A.getASTContext().getSourceManager(); 188 *OS << "<title>"; 189 if (const auto *ND = dyn_cast<NamedDecl>(&D)) 190 *OS << ND->getNameAsString() << " at "; 191 *OS << SM.getFilename(D.getLocation()) << ":" 192 << SM.getSpellingLineNumber(D.getLocation()); 193 *OS << "</title>\n"; 194 195 *OS << "<style>" << HTMLLogger_css << "</style>\n"; 196 *OS << "<script>" << HTMLLogger_js << "</script>\n"; 197 198 writeCode(); 199 JOS.objectBegin(); 200 JOS.attributeBegin("states"); 201 JOS.objectBegin(); 202 } 203 // Between beginAnalysis() and endAnalysis() we write all the states for 204 // particular analysis points into the `timeline` array. 205 void endAnalysis() override { 206 JOS.objectEnd(); 207 JOS.attributeEnd(); 208 209 JOS.attributeArray("timeline", [&] { 210 for (const auto &E : Iters) { 211 JOS.object([&] { 212 JOS.attribute("block", blockID(E.Block->getBlockID())); 213 JOS.attribute("iter", E.Iter); 214 JOS.attribute("post_visit", E.PostVisit); 215 JOS.attribute("converged", E.Converged); 216 }); 217 } 218 }); 219 JOS.attributeObject("cfg", [&] { 220 for (const auto &E : BlockIters) 221 writeBlock(*E.first, E.second); 222 }); 223 224 JOS.objectEnd(); 225 226 writeCFG(); 227 228 *OS << "<script>var HTMLLoggerData = \n"; 229 *OS << JSON; 230 *OS << ";\n</script>\n"; 231 *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second; 232 } 233 234 void enterBlock(const CFGBlock &B, bool PostVisit) override { 235 llvm::SmallVector<size_t> &BIter = BlockIters[&B]; 236 unsigned IterNum = BIter.size() + 1; 237 BIter.push_back(Iters.size()); 238 Iters.push_back({&B, IterNum, PostVisit, /*Converged=*/false}); 239 if (!PostVisit) 240 BlockConverged[B.getBlockID()] = false; 241 ElementIndex = 0; 242 } 243 void enterElement(const CFGElement &E) override { 244 ++ElementIndex; 245 } 246 247 static std::string blockID(unsigned Block) { 248 return llvm::formatv("B{0}", Block); 249 } 250 static std::string eltID(unsigned Block, unsigned Element) { 251 return llvm::formatv("B{0}.{1}", Block, Element); 252 } 253 static std::string iterID(unsigned Block, unsigned Iter) { 254 return llvm::formatv("B{0}:{1}", Block, Iter); 255 } 256 static std::string elementIterID(unsigned Block, unsigned Iter, 257 unsigned Element) { 258 return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element); 259 } 260 261 // Write the analysis state associated with a particular analysis point. 262 // FIXME: this dump is fairly opaque. We should show: 263 // - values associated with the current Stmt 264 // - values associated with its children 265 // - meaningful names for values 266 // - which boolean values are implied true/false by the flow condition 267 void recordState(TypeErasedDataflowAnalysisState &State) override { 268 unsigned Block = Iters.back().Block->getBlockID(); 269 unsigned Iter = Iters.back().Iter; 270 bool PostVisit = Iters.back().PostVisit; 271 JOS.attributeObject(elementIterID(Block, Iter, ElementIndex), [&] { 272 JOS.attribute("block", blockID(Block)); 273 JOS.attribute("iter", Iter); 274 JOS.attribute("post_visit", PostVisit); 275 JOS.attribute("element", ElementIndex); 276 277 // If this state immediately follows an Expr, show its built-in model. 278 if (ElementIndex > 0) { 279 auto S = 280 Iters.back().Block->Elements[ElementIndex - 1].getAs<CFGStmt>(); 281 if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) { 282 if (E->isPRValue()) { 283 if (!E->getType()->isRecordType()) 284 if (auto *V = State.Env.getValue(*E)) 285 JOS.attributeObject( 286 "value", [&] { ModelDumper(JOS, State.Env).dump(*V); }); 287 } else { 288 if (auto *Loc = State.Env.getStorageLocation(*E)) 289 JOS.attributeObject( 290 "value", [&] { ModelDumper(JOS, State.Env).dump(*Loc); }); 291 } 292 } 293 } 294 if (!ContextLogs.empty()) { 295 JOS.attribute("logs", ContextLogs); 296 ContextLogs.clear(); 297 } 298 { 299 std::string BuiltinLattice; 300 llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice); 301 State.Env.dump(BuiltinLatticeS); 302 JOS.attribute("builtinLattice", BuiltinLattice); 303 } 304 }); 305 } 306 void blockConverged() override { 307 Iters.back().Converged = true; 308 BlockConverged[Iters.back().Block->getBlockID()] = true; 309 } 310 311 void logText(llvm::StringRef S) override { 312 ContextLogs.append(S.begin(), S.end()); 313 ContextLogs.push_back('\n'); 314 } 315 316 private: 317 // Write the CFG block details. 318 // Currently this is just the list of elements in execution order. 319 // FIXME: an AST dump would be a useful view, too. 320 void writeBlock(const CFGBlock &B, llvm::ArrayRef<size_t> ItersForB) { 321 JOS.attributeObject(blockID(B.getBlockID()), [&] { 322 JOS.attributeArray("iters", [&] { 323 for (size_t IterIdx : ItersForB) { 324 const Iteration &Iter = Iters[IterIdx]; 325 JOS.object([&] { 326 JOS.attribute("iter", Iter.Iter); 327 JOS.attribute("post_visit", Iter.PostVisit); 328 JOS.attribute("converged", Iter.Converged); 329 }); 330 } 331 }); 332 JOS.attributeArray("elements", [&] { 333 for (const auto &Elt : B.Elements) { 334 std::string Dump; 335 llvm::raw_string_ostream DumpS(Dump); 336 Elt.dumpToStream(DumpS); 337 JOS.value(Dump); 338 } 339 }); 340 }); 341 } 342 343 // Write the code of function being examined. 344 // We want to overlay the code with <span>s that mark which BB particular 345 // tokens are associated with, and even which BB element (so that clicking 346 // can select the right element). 347 void writeCode() { 348 const auto &AST = ACFG->getDecl().getASTContext(); 349 bool Invalid = false; 350 351 // Extract the source code from the original file. 352 // Pretty-printing from the AST would probably be nicer (no macros or 353 // indentation to worry about), but we need the boundaries of particular 354 // AST nodes and the printer doesn't provide this. 355 auto Range = clang::Lexer::makeFileCharRange( 356 CharSourceRange::getTokenRange(ACFG->getDecl().getSourceRange()), 357 AST.getSourceManager(), AST.getLangOpts()); 358 if (Range.isInvalid()) 359 return; 360 llvm::StringRef Code = clang::Lexer::getSourceText( 361 Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid); 362 if (Invalid) 363 return; 364 365 // TokenInfo stores the BB and set of elements that a token is part of. 366 struct TokenInfo { 367 enum : unsigned { Missing = static_cast<unsigned>(-1) }; 368 369 // The basic block this is part of. 370 // This is the BB of the stmt with the smallest containing range. 371 unsigned BB = Missing; 372 unsigned BBPriority = 0; 373 // The most specific stmt this is part of (smallest range). 374 unsigned Elt = Missing; 375 unsigned EltPriority = 0; 376 // All stmts this is part of. 377 SmallVector<unsigned> Elts; 378 379 // Mark this token as being part of BB.Elt. 380 // RangeLen is the character length of the element's range, used to 381 // distinguish inner vs outer statements. 382 // For example in `a==0`, token "a" is part of the stmts "a" and "a==0". 383 // However "a" has a smaller range, so is more specific. Clicking on the 384 // token "a" should select the stmt "a". 385 void assign(unsigned BB, unsigned Elt, unsigned RangeLen) { 386 // A worse BB (larger range) => ignore. 387 if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen) 388 return; 389 if (BB != this->BB) { 390 this->BB = BB; 391 Elts.clear(); 392 BBPriority = RangeLen; 393 } 394 BBPriority = std::min(BBPriority, RangeLen); 395 Elts.push_back(Elt); 396 if (this->Elt == Missing || EltPriority > RangeLen) 397 this->Elt = Elt; 398 } 399 bool operator==(const TokenInfo &Other) const { 400 return std::tie(BB, Elt, Elts) == 401 std::tie(Other.BB, Other.Elt, Other.Elts); 402 } 403 // Write the attributes for the <span> on this token. 404 void write(llvm::raw_ostream &OS) const { 405 OS << "class='c"; 406 if (BB != Missing) 407 OS << " " << blockID(BB); 408 for (unsigned Elt : Elts) 409 OS << " " << eltID(BB, Elt); 410 OS << "'"; 411 412 if (Elt != Missing) 413 OS << " data-elt='" << eltID(BB, Elt) << "'"; 414 if (BB != Missing) 415 OS << " data-bb='" << blockID(BB) << "'"; 416 } 417 }; 418 419 // Construct one TokenInfo per character in a flat array. 420 // This is inefficient (chars in a token all have the same info) but simple. 421 std::vector<TokenInfo> State(Code.size()); 422 for (const auto *Block : ACFG->getCFG()) { 423 unsigned EltIndex = 0; 424 for (const auto& Elt : *Block) { 425 ++EltIndex; 426 if (const auto S = Elt.getAs<CFGStmt>()) { 427 auto EltRange = clang::Lexer::makeFileCharRange( 428 CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()), 429 AST.getSourceManager(), AST.getLangOpts()); 430 if (EltRange.isInvalid()) 431 continue; 432 if (EltRange.getBegin() < Range.getBegin() || 433 EltRange.getEnd() >= Range.getEnd() || 434 EltRange.getEnd() < Range.getBegin() || 435 EltRange.getEnd() >= Range.getEnd()) 436 continue; 437 unsigned Off = EltRange.getBegin().getRawEncoding() - 438 Range.getBegin().getRawEncoding(); 439 unsigned Len = EltRange.getEnd().getRawEncoding() - 440 EltRange.getBegin().getRawEncoding(); 441 for (unsigned I = 0; I < Len; ++I) 442 State[Off + I].assign(Block->getBlockID(), EltIndex, Len); 443 } 444 } 445 } 446 447 // Finally, write the code with the correct <span>s. 448 unsigned Line = 449 AST.getSourceManager().getSpellingLineNumber(Range.getBegin()); 450 *OS << "<template data-copy='code'>\n"; 451 *OS << "<code class='filename'>"; 452 llvm::printHTMLEscaped( 453 llvm::sys::path::filename( 454 AST.getSourceManager().getFilename(Range.getBegin())), 455 *OS); 456 *OS << "</code>"; 457 *OS << "<code class='line' data-line='" << Line++ << "'>"; 458 for (unsigned I = 0; I < Code.size(); ++I) { 459 // Don't actually write a <span> around each character, only break spans 460 // when the TokenInfo changes. 461 bool NeedOpen = I == 0 || !(State[I] == State[I-1]); 462 bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]); 463 if (NeedOpen) { 464 *OS << "<span "; 465 State[I].write(*OS); 466 *OS << ">"; 467 } 468 if (Code[I] == '\n') 469 *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>"; 470 else 471 llvm::printHTMLEscaped(Code.substr(I, 1), *OS); 472 if (NeedClose) *OS << "</span>"; 473 } 474 *OS << "</code>\n"; 475 *OS << "</template>"; 476 } 477 478 // Write the CFG diagram, a graph of basic blocks. 479 // Laying out graphs is hard, so we construct a graphviz description and shell 480 // out to `dot` to turn it into an SVG. 481 void writeCFG() { 482 *OS << "<template data-copy='cfg'>\n"; 483 if (auto SVG = renderSVG(buildCFGDot(ACFG->getCFG()))) 484 *OS << *SVG; 485 else 486 *OS << "Can't draw CFG: " << toString(SVG.takeError()); 487 *OS << "</template>\n"; 488 } 489 490 // Produce a graphviz description of a CFG. 491 std::string buildCFGDot(const clang::CFG &CFG) { 492 std::string Graph; 493 llvm::raw_string_ostream GraphS(Graph); 494 // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses. 495 GraphS << R"(digraph { 496 tooltip=" " 497 node[class=bb, shape=square, fontname="sans-serif", tooltip=" "] 498 edge[tooltip = " "] 499 )"; 500 for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) { 501 std::string Name = blockID(I); 502 // Rightwards arrow, vertical line 503 const char *ConvergenceMarker = (const char *)u8"\\n\u2192\u007c"; 504 if (BlockConverged[I]) 505 Name += ConvergenceMarker; 506 GraphS << " " << blockID(I) << " [id=" << blockID(I) << " label=\"" 507 << Name << "\"]\n"; 508 } 509 for (const auto *Block : CFG) { 510 for (const auto &Succ : Block->succs()) { 511 if (Succ.getReachableBlock()) 512 GraphS << " " << blockID(Block->getBlockID()) << " -> " 513 << blockID(Succ.getReachableBlock()->getBlockID()) << "\n"; 514 } 515 } 516 GraphS << "}\n"; 517 return Graph; 518 } 519 }; 520 521 // Nothing interesting here, just subprocess/temp-file plumbing. 522 llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) { 523 std::string DotPath; 524 if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT")) 525 DotPath = FromEnv; 526 else { 527 auto FromPath = llvm::sys::findProgramByName("dot"); 528 if (!FromPath) 529 return llvm::createStringError(FromPath.getError(), 530 "'dot' not found on PATH"); 531 DotPath = FromPath.get(); 532 } 533 534 // Create input and output files for `dot` subprocess. 535 // (We create the output file as empty, to reserve the temp filename). 536 llvm::SmallString<256> Input, Output; 537 int InputFD; 538 if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD, 539 Input)) 540 return llvm::createStringError(EC, "failed to create `dot` temp input"); 541 llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph; 542 auto DeleteInput = 543 llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); }); 544 if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output)) 545 return llvm::createStringError(EC, "failed to create `dot` temp output"); 546 auto DeleteOutput = 547 llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); }); 548 549 std::vector<std::optional<llvm::StringRef>> Redirects = { 550 Input, Output, 551 /*stderr=*/std::nullopt}; 552 std::string ErrMsg; 553 int Code = llvm::sys::ExecuteAndWait( 554 DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects, 555 /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); 556 if (!ErrMsg.empty()) 557 return llvm::createStringError(llvm::inconvertibleErrorCode(), 558 "'dot' failed: " + ErrMsg); 559 if (Code != 0) 560 return llvm::createStringError(llvm::inconvertibleErrorCode(), 561 "'dot' failed (" + llvm::Twine(Code) + ")"); 562 563 auto Buf = llvm::MemoryBuffer::getFile(Output); 564 if (!Buf) 565 return llvm::createStringError(Buf.getError(), "Can't read `dot` output"); 566 567 // Output has <?xml> prefix we don't want. Skip to <svg> tag. 568 llvm::StringRef Result = Buf.get()->getBuffer(); 569 auto Pos = Result.find("<svg"); 570 if (Pos == llvm::StringRef::npos) 571 return llvm::createStringError(llvm::inconvertibleErrorCode(), 572 "Can't find <svg> tag in `dot` output"); 573 return Result.substr(Pos).str(); 574 } 575 576 } // namespace 577 578 std::unique_ptr<Logger> 579 Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) { 580 return std::make_unique<HTMLLogger>(std::move(Streams)); 581 } 582 583 } // namespace clang::dataflow 584