1 //===-- HTMLLogger.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the HTML logger. Given a directory dir/, we write 10 // dir/0.html for the first analysis, etc. 11 // These files contain a visualization that allows inspecting the CFG and the 12 // state of the analysis at each point. 13 // Static assets (HTMLLogger.js, HTMLLogger.css) and SVG graphs etc are embedded 14 // so each output file is self-contained. 15 // 16 // VIEWS 17 // 18 // The timeline and function view are always shown. These allow selecting basic 19 // blocks, statements within them, and processing iterations (BBs are visited 20 // multiple times when e.g. loops are involved). 21 // These are written directly into the HTML body. 22 // 23 // There are also listings of particular basic blocks, and dumps of the state 24 // at particular analysis points (i.e. BB2 iteration 3 statement 2). 25 // These are only shown when the relevant BB/analysis point is *selected*. 26 // 27 // DATA AND TEMPLATES 28 // 29 // The HTML proper is mostly static. 30 // The analysis data is in a JSON object HTMLLoggerData which is embedded as 31 // a <script> in the <head>. 32 // This gets rendered into DOM by a simple template processor which substitutes 33 // the data into <template> tags embedded in the HTML. (see inflate() in JS). 34 // 35 // SELECTION 36 // 37 // This is the only real interactive mechanism. 38 // 39 // At any given time, there are several named selections, e.g.: 40 // bb: B2 (basic block 0 is selected) 41 // elt: B2.4 (statement 4 is selected) 42 // iter: B2:1 (iteration 1 of the basic block is selected) 43 // hover: B3 (hovering over basic block 3) 44 // 45 // The selection is updated by mouse events: hover by moving the mouse and 46 // others by clicking. Elements that are click targets generally have attributes 47 // (id or data-foo) that define what they should select. 48 // See watchSelection() in JS for the exact logic. 49 // 50 // When the "bb" selection is set to "B2": 51 // - sections <section data-selection="bb"> get shown 52 // - templates under such sections get re-rendered 53 // - elements with class/id "B2" get class "bb-select" 54 // 55 //===----------------------------------------------------------------------===// 56 57 #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" 58 #include "clang/Analysis/FlowSensitive/DebugSupport.h" 59 #include "clang/Analysis/FlowSensitive/Logger.h" 60 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" 61 #include "clang/Analysis/FlowSensitive/Value.h" 62 #include "clang/Basic/SourceManager.h" 63 #include "clang/Lex/Lexer.h" 64 #include "llvm/ADT/DenseMap.h" 65 #include "llvm/ADT/ScopeExit.h" 66 #include "llvm/Support/Error.h" 67 #include "llvm/Support/FormatVariadic.h" 68 #include "llvm/Support/JSON.h" 69 #include "llvm/Support/Program.h" 70 #include "llvm/Support/ScopedPrinter.h" 71 #include "llvm/Support/raw_ostream.h" 72 // Defines assets: HTMLLogger_{html_js,css} 73 #include "HTMLLogger.inc" 74 75 namespace clang::dataflow { 76 namespace { 77 78 // Render a graphviz graph specification to SVG using the `dot` tool. 79 llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph); 80 81 using StreamFactory = std::function<std::unique_ptr<llvm::raw_ostream>()>; 82 83 // Recursively dumps Values/StorageLocations as JSON 84 class ModelDumper { 85 public: 86 ModelDumper(llvm::json::OStream &JOS, const Environment &Env) 87 : JOS(JOS), Env(Env) {} 88 89 void dump(Value &V) { 90 JOS.attribute("value_id", llvm::to_string(&V)); 91 if (!Visited.insert(&V).second) 92 return; 93 94 JOS.attribute("kind", debugString(V.getKind())); 95 96 switch (V.getKind()) { 97 case Value::Kind::Integer: 98 case Value::Kind::TopBool: 99 case Value::Kind::AtomicBool: 100 case Value::Kind::FormulaBool: 101 break; 102 case Value::Kind::Reference: 103 JOS.attributeObject( 104 "referent", [&] { dump(cast<ReferenceValue>(V).getReferentLoc()); }); 105 break; 106 case Value::Kind::Pointer: 107 JOS.attributeObject( 108 "pointee", [&] { dump(cast<PointerValue>(V).getPointeeLoc()); }); 109 break; 110 case Value::Kind::Struct: 111 for (const auto &Child : cast<StructValue>(V).children()) 112 JOS.attributeObject("f:" + Child.first->getNameAsString(), 113 [&] { dump(*Child.second); }); 114 break; 115 } 116 117 for (const auto& Prop : V.properties()) 118 JOS.attributeObject(("p:" + Prop.first()).str(), 119 [&] { dump(*Prop.second); }); 120 121 // Running the SAT solver is expensive, but knowing which booleans are 122 // guaranteed true/false here is valuable and hard to determine by hand. 123 if (auto *B = llvm::dyn_cast<BoolValue>(&V)) { 124 JOS.attribute("formula", llvm::to_string(B->formula())); 125 JOS.attribute( 126 "truth", Env.flowConditionImplies(B->formula()) ? "true" 127 : Env.flowConditionImplies(Env.arena().makeNot(B->formula())) 128 ? "false" 129 : "unknown"); 130 } 131 } 132 void dump(const StorageLocation &L) { 133 JOS.attribute("location", llvm::to_string(&L)); 134 if (!Visited.insert(&L).second) 135 return; 136 137 JOS.attribute("type", L.getType().getAsString()); 138 if (auto *V = Env.getValue(L)) 139 dump(*V); 140 } 141 142 llvm::DenseSet<const void*> Visited; 143 llvm::json::OStream &JOS; 144 const Environment &Env; 145 }; 146 147 class HTMLLogger : public Logger { 148 StreamFactory Streams; 149 std::unique_ptr<llvm::raw_ostream> OS; 150 std::optional<llvm::json::OStream> JOS; 151 152 const ControlFlowContext *CFG; 153 // Timeline of iterations of CFG block visitation. 154 std::vector<std::pair<const CFGBlock *, unsigned>> Iters; 155 // Number of times each CFG block has been seen. 156 llvm::DenseMap<const CFGBlock *, unsigned> BlockIters; 157 // The messages logged in the current context but not yet written. 158 std::string ContextLogs; 159 // The number of elements we have visited within the current CFG block. 160 unsigned ElementIndex; 161 162 public: 163 explicit HTMLLogger(StreamFactory Streams) : Streams(std::move(Streams)) {} 164 void beginAnalysis(const ControlFlowContext &CFG, 165 TypeErasedDataflowAnalysis &A) override { 166 OS = Streams(); 167 this->CFG = &CFG; 168 *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").first; 169 170 if (const auto *D = CFG.getDecl()) { 171 const auto &SM = A.getASTContext().getSourceManager(); 172 *OS << "<title>"; 173 if (const auto *ND = dyn_cast<NamedDecl>(D)) 174 *OS << ND->getNameAsString() << " at "; 175 *OS << SM.getFilename(D->getLocation()) << ":" 176 << SM.getSpellingLineNumber(D->getLocation()); 177 *OS << "</title>\n"; 178 }; 179 180 *OS << "<style>" << HTMLLogger_css << "</style>\n"; 181 *OS << "<script>" << HTMLLogger_js << "</script>\n"; 182 183 writeCode(); 184 writeCFG(); 185 186 *OS << "<script>var HTMLLoggerData = \n"; 187 JOS.emplace(*OS, /*Indent=*/2); 188 JOS->objectBegin(); 189 JOS->attributeBegin("states"); 190 JOS->objectBegin(); 191 } 192 // Between beginAnalysis() and endAnalysis() we write all the states for 193 // particular analysis points into the `timeline` array. 194 void endAnalysis() override { 195 JOS->objectEnd(); 196 JOS->attributeEnd(); 197 198 JOS->attributeArray("timeline", [&] { 199 for (const auto &E : Iters) { 200 JOS->object([&] { 201 JOS->attribute("block", blockID(E.first->getBlockID())); 202 JOS->attribute("iter", E.second); 203 }); 204 } 205 }); 206 JOS->attributeObject("cfg", [&] { 207 for (const auto &E : BlockIters) 208 writeBlock(*E.first, E.second); 209 }); 210 211 JOS->objectEnd(); 212 JOS.reset(); 213 *OS << ";\n</script>\n"; 214 *OS << llvm::StringRef(HTMLLogger_html).split("<?INJECT?>").second; 215 } 216 217 void enterBlock(const CFGBlock &B) override { 218 Iters.emplace_back(&B, ++BlockIters[&B]); 219 ElementIndex = 0; 220 } 221 void enterElement(const CFGElement &E) override { 222 ++ElementIndex; 223 } 224 225 static std::string blockID(unsigned Block) { 226 return llvm::formatv("B{0}", Block); 227 } 228 static std::string eltID(unsigned Block, unsigned Element) { 229 return llvm::formatv("B{0}.{1}", Block, Element); 230 } 231 static std::string iterID(unsigned Block, unsigned Iter) { 232 return llvm::formatv("B{0}:{1}", Block, Iter); 233 } 234 static std::string elementIterID(unsigned Block, unsigned Iter, 235 unsigned Element) { 236 return llvm::formatv("B{0}:{1}_B{0}.{2}", Block, Iter, Element); 237 } 238 239 // Write the analysis state associated with a particular analysis point. 240 // FIXME: this dump is fairly opaque. We should show: 241 // - values associated with the current Stmt 242 // - values associated with its children 243 // - meaningful names for values 244 // - which boolean values are implied true/false by the flow condition 245 void recordState(TypeErasedDataflowAnalysisState &State) override { 246 unsigned Block = Iters.back().first->getBlockID(); 247 unsigned Iter = Iters.back().second; 248 JOS->attributeObject(elementIterID(Block, Iter, ElementIndex), [&] { 249 JOS->attribute("block", blockID(Block)); 250 JOS->attribute("iter", Iter); 251 JOS->attribute("element", ElementIndex); 252 253 // If this state immediately follows an Expr, show its built-in model. 254 if (ElementIndex > 0) { 255 auto S = 256 Iters.back().first->Elements[ElementIndex - 1].getAs<CFGStmt>(); 257 if (const Expr *E = S ? llvm::dyn_cast<Expr>(S->getStmt()) : nullptr) 258 if (auto *Loc = State.Env.getStorageLocation(*E, SkipPast::None)) 259 JOS->attributeObject( 260 "value", [&] { ModelDumper(*JOS, State.Env).dump(*Loc); }); 261 } 262 if (!ContextLogs.empty()) { 263 JOS->attribute("logs", ContextLogs); 264 ContextLogs.clear(); 265 } 266 { 267 std::string BuiltinLattice; 268 llvm::raw_string_ostream BuiltinLatticeS(BuiltinLattice); 269 State.Env.dump(BuiltinLatticeS); 270 JOS->attribute("builtinLattice", BuiltinLattice); 271 } 272 }); 273 } 274 void blockConverged() override { logText("Block converged"); } 275 276 void logText(llvm::StringRef S) override { 277 ContextLogs.append(S.begin(), S.end()); 278 ContextLogs.push_back('\n'); 279 } 280 281 private: 282 // Write the CFG block details. 283 // Currently this is just the list of elements in execution order. 284 // FIXME: an AST dump would be a useful view, too. 285 void writeBlock(const CFGBlock &B, unsigned Iters) { 286 JOS->attributeObject(blockID(B.getBlockID()), [&] { 287 JOS->attribute("iters", Iters); 288 JOS->attributeArray("elements", [&] { 289 for (const auto &Elt : B.Elements) { 290 std::string Dump; 291 llvm::raw_string_ostream DumpS(Dump); 292 Elt.dumpToStream(DumpS); 293 JOS->value(Dump); 294 } 295 }); 296 }); 297 } 298 299 // Write the code of function being examined. 300 // We want to overlay the code with <span>s that mark which BB particular 301 // tokens are associated with, and even which BB element (so that clicking 302 // can select the right element). 303 void writeCode() { 304 if (!CFG->getDecl()) 305 return; 306 const auto &AST = CFG->getDecl()->getASTContext(); 307 bool Invalid = false; 308 309 // Extract the source code from the original file. 310 // Pretty-printing from the AST would probably be nicer (no macros or 311 // indentation to worry about), but we need the boundaries of particular 312 // AST nodes and the printer doesn't provide this. 313 auto Range = clang::Lexer::makeFileCharRange( 314 CharSourceRange::getTokenRange(CFG->getDecl()->getSourceRange()), 315 AST.getSourceManager(), AST.getLangOpts()); 316 if (Range.isInvalid()) 317 return; 318 llvm::StringRef Code = clang::Lexer::getSourceText( 319 Range, AST.getSourceManager(), AST.getLangOpts(), &Invalid); 320 if (Invalid) 321 return; 322 323 static constexpr unsigned Missing = -1; 324 // TokenInfo stores the BB and set of elements that a token is part of. 325 struct TokenInfo { 326 // The basic block this is part of. 327 // This is the BB of the stmt with the smallest containing range. 328 unsigned BB = Missing; 329 unsigned BBPriority = 0; 330 // The most specific stmt this is part of (smallest range). 331 unsigned Elt = Missing; 332 unsigned EltPriority = 0; 333 // All stmts this is part of. 334 SmallVector<unsigned> Elts; 335 336 // Mark this token as being part of BB.Elt. 337 // RangeLen is the character length of the element's range, used to 338 // distinguish inner vs outer statements. 339 // For example in `a==0`, token "a" is part of the stmts "a" and "a==0". 340 // However "a" has a smaller range, so is more specific. Clicking on the 341 // token "a" should select the stmt "a". 342 void assign(unsigned BB, unsigned Elt, unsigned RangeLen) { 343 // A worse BB (larger range) => ignore. 344 if (this->BB != Missing && BB != this->BB && BBPriority <= RangeLen) 345 return; 346 if (BB != this->BB) { 347 this->BB = BB; 348 Elts.clear(); 349 BBPriority = RangeLen; 350 } 351 BBPriority = std::min(BBPriority, RangeLen); 352 Elts.push_back(Elt); 353 if (this->Elt == Missing || EltPriority > RangeLen) 354 this->Elt = Elt; 355 } 356 bool operator==(const TokenInfo &Other) const { 357 return std::tie(BB, Elt, Elts) == 358 std::tie(Other.BB, Other.Elt, Other.Elts); 359 } 360 // Write the attributes for the <span> on this token. 361 void write(llvm::raw_ostream &OS) const { 362 OS << "class='c"; 363 if (BB != Missing) 364 OS << " " << blockID(BB); 365 for (unsigned Elt : Elts) 366 OS << " " << eltID(BB, Elt); 367 OS << "'"; 368 369 if (Elt != Missing) 370 OS << " data-elt='" << eltID(BB, Elt) << "'"; 371 if (BB != Missing) 372 OS << " data-bb='" << blockID(BB) << "'"; 373 } 374 }; 375 376 // Construct one TokenInfo per character in a flat array. 377 // This is inefficient (chars in a token all have the same info) but simple. 378 std::vector<TokenInfo> State(Code.size()); 379 for (const auto *Block : CFG->getCFG()) { 380 unsigned EltIndex = 0; 381 for (const auto& Elt : *Block) { 382 ++EltIndex; 383 if (const auto S = Elt.getAs<CFGStmt>()) { 384 auto EltRange = clang::Lexer::makeFileCharRange( 385 CharSourceRange::getTokenRange(S->getStmt()->getSourceRange()), 386 AST.getSourceManager(), AST.getLangOpts()); 387 if (EltRange.isInvalid()) 388 continue; 389 if (EltRange.getBegin() < Range.getBegin() || 390 EltRange.getEnd() >= Range.getEnd() || 391 EltRange.getEnd() < Range.getBegin() || 392 EltRange.getEnd() >= Range.getEnd()) 393 continue; 394 unsigned Off = EltRange.getBegin().getRawEncoding() - 395 Range.getBegin().getRawEncoding(); 396 unsigned Len = EltRange.getEnd().getRawEncoding() - 397 EltRange.getBegin().getRawEncoding(); 398 for (unsigned I = 0; I < Len; ++I) 399 State[Off + I].assign(Block->getBlockID(), EltIndex, Len); 400 } 401 } 402 } 403 404 // Finally, write the code with the correct <span>s. 405 unsigned Line = 406 AST.getSourceManager().getSpellingLineNumber(Range.getBegin()); 407 *OS << "<template data-copy='code'>\n"; 408 *OS << "<code class='filename'>"; 409 llvm::printHTMLEscaped( 410 llvm::sys::path::filename( 411 AST.getSourceManager().getFilename(Range.getBegin())), 412 *OS); 413 *OS << "</code>"; 414 *OS << "<code class='line' data-line='" << Line++ << "'>"; 415 for (unsigned I = 0; I < Code.size(); ++I) { 416 // Don't actually write a <span> around each character, only break spans 417 // when the TokenInfo changes. 418 bool NeedOpen = I == 0 || !(State[I] == State[I-1]); 419 bool NeedClose = I + 1 == Code.size() || !(State[I] == State[I + 1]); 420 if (NeedOpen) { 421 *OS << "<span "; 422 State[I].write(*OS); 423 *OS << ">"; 424 } 425 if (Code[I] == '\n') 426 *OS << "</code>\n<code class='line' data-line='" << Line++ << "'>"; 427 else 428 llvm::printHTMLEscaped(Code.substr(I, 1), *OS); 429 if (NeedClose) *OS << "</span>"; 430 } 431 *OS << "</code>\n"; 432 *OS << "</template>"; 433 } 434 435 // Write the CFG diagram, a graph of basic blocks. 436 // Laying out graphs is hard, so we construct a graphviz description and shell 437 // out to `dot` to turn it into an SVG. 438 void writeCFG() { 439 *OS << "<template data-copy='cfg'>\n"; 440 if (auto SVG = renderSVG(buildCFGDot(CFG->getCFG()))) 441 *OS << *SVG; 442 else 443 *OS << "Can't draw CFG: " << toString(SVG.takeError()); 444 *OS << "</template>\n"; 445 } 446 447 // Produce a graphviz description of a CFG. 448 static std::string buildCFGDot(const clang::CFG &CFG) { 449 std::string Graph; 450 llvm::raw_string_ostream GraphS(Graph); 451 // Graphviz likes to add unhelpful tooltips everywhere, " " suppresses. 452 GraphS << R"(digraph { 453 tooltip=" " 454 node[class=bb, shape=square, fontname="sans-serif", tooltip=" "] 455 edge[tooltip = " "] 456 )"; 457 for (unsigned I = 0; I < CFG.getNumBlockIDs(); ++I) 458 GraphS << " " << blockID(I) << " [id=" << blockID(I) << "]\n"; 459 for (const auto *Block : CFG) { 460 for (const auto &Succ : Block->succs()) { 461 GraphS << " " << blockID(Block->getBlockID()) << " -> " 462 << blockID(Succ.getReachableBlock()->getBlockID()) << "\n"; 463 } 464 } 465 GraphS << "}\n"; 466 return Graph; 467 } 468 }; 469 470 // Nothing interesting here, just subprocess/temp-file plumbing. 471 llvm::Expected<std::string> renderSVG(llvm::StringRef DotGraph) { 472 std::string DotPath; 473 if (const auto *FromEnv = ::getenv("GRAPHVIZ_DOT")) 474 DotPath = FromEnv; 475 else { 476 auto FromPath = llvm::sys::findProgramByName("dot"); 477 if (!FromPath) 478 return llvm::createStringError(FromPath.getError(), 479 "'dot' not found on PATH"); 480 DotPath = FromPath.get(); 481 } 482 483 // Create input and output files for `dot` subprocess. 484 // (We create the output file as empty, to reserve the temp filename). 485 llvm::SmallString<256> Input, Output; 486 int InputFD; 487 if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".dot", InputFD, 488 Input)) 489 return llvm::createStringError(EC, "failed to create `dot` temp input"); 490 llvm::raw_fd_ostream(InputFD, /*shouldClose=*/true) << DotGraph; 491 auto DeleteInput = 492 llvm::make_scope_exit([&] { llvm::sys::fs::remove(Input); }); 493 if (auto EC = llvm::sys::fs::createTemporaryFile("analysis", ".svg", Output)) 494 return llvm::createStringError(EC, "failed to create `dot` temp output"); 495 auto DeleteOutput = 496 llvm::make_scope_exit([&] { llvm::sys::fs::remove(Output); }); 497 498 std::vector<std::optional<llvm::StringRef>> Redirects = { 499 Input, Output, 500 /*stderr=*/std::nullopt}; 501 std::string ErrMsg; 502 int Code = llvm::sys::ExecuteAndWait( 503 DotPath, {"dot", "-Tsvg"}, /*Env=*/std::nullopt, Redirects, 504 /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg); 505 if (!ErrMsg.empty()) 506 return llvm::createStringError(llvm::inconvertibleErrorCode(), 507 "'dot' failed: " + ErrMsg); 508 if (Code != 0) 509 return llvm::createStringError(llvm::inconvertibleErrorCode(), 510 "'dot' failed (" + llvm::Twine(Code) + ")"); 511 512 auto Buf = llvm::MemoryBuffer::getFile(Output); 513 if (!Buf) 514 return llvm::createStringError(Buf.getError(), "Can't read `dot` output"); 515 516 // Output has <?xml> prefix we don't want. Skip to <svg> tag. 517 llvm::StringRef Result = Buf.get()->getBuffer(); 518 auto Pos = Result.find("<svg"); 519 if (Pos == llvm::StringRef::npos) 520 return llvm::createStringError(llvm::inconvertibleErrorCode(), 521 "Can't find <svg> tag in `dot` output"); 522 return Result.substr(Pos).str(); 523 } 524 525 } // namespace 526 527 std::unique_ptr<Logger> 528 Logger::html(std::function<std::unique_ptr<llvm::raw_ostream>()> Streams) { 529 return std::make_unique<HTMLLogger>(std::move(Streams)); 530 } 531 532 } // namespace clang::dataflow 533