1 //== HTMLRewrite.cpp - Translate source code into prettified HTML --*- C++ -*-// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the HTMLRewriter class, which is used to translate the 11 // text of a source file into prettified HTML. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Rewrite/Core/HTMLRewrite.h" 16 #include "clang/Basic/SourceManager.h" 17 #include "clang/Lex/Preprocessor.h" 18 #include "clang/Lex/TokenConcatenation.h" 19 #include "clang/Rewrite/Core/Rewriter.h" 20 #include "llvm/ADT/SmallString.h" 21 #include "llvm/Support/ErrorHandling.h" 22 #include "llvm/Support/MemoryBuffer.h" 23 #include "llvm/Support/raw_ostream.h" 24 #include <memory> 25 using namespace clang; 26 27 28 /// HighlightRange - Highlight a range in the source code with the specified 29 /// start/end tags. B/E must be in the same file. This ensures that 30 /// start/end tags are placed at the start/end of each line if the range is 31 /// multiline. 32 void html::HighlightRange(Rewriter &R, SourceLocation B, SourceLocation E, 33 const char *StartTag, const char *EndTag, 34 bool IsTokenRange) { 35 SourceManager &SM = R.getSourceMgr(); 36 B = SM.getExpansionLoc(B); 37 E = SM.getExpansionLoc(E); 38 FileID FID = SM.getFileID(B); 39 assert(SM.getFileID(E) == FID && "B/E not in the same file!"); 40 41 unsigned BOffset = SM.getFileOffset(B); 42 unsigned EOffset = SM.getFileOffset(E); 43 44 // Include the whole end token in the range. 45 if (IsTokenRange) 46 EOffset += Lexer::MeasureTokenLength(E, R.getSourceMgr(), R.getLangOpts()); 47 48 bool Invalid = false; 49 const char *BufferStart = SM.getBufferData(FID, &Invalid).data(); 50 if (Invalid) 51 return; 52 53 HighlightRange(R.getEditBuffer(FID), BOffset, EOffset, 54 BufferStart, StartTag, EndTag); 55 } 56 57 /// HighlightRange - This is the same as the above method, but takes 58 /// decomposed file locations. 59 void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E, 60 const char *BufferStart, 61 const char *StartTag, const char *EndTag) { 62 // Insert the tag at the absolute start/end of the range. 63 RB.InsertTextAfter(B, StartTag); 64 RB.InsertTextBefore(E, EndTag); 65 66 // Scan the range to see if there is a \r or \n. If so, and if the line is 67 // not blank, insert tags on that line as well. 68 bool HadOpenTag = true; 69 70 unsigned LastNonWhiteSpace = B; 71 for (unsigned i = B; i != E; ++i) { 72 switch (BufferStart[i]) { 73 case '\r': 74 case '\n': 75 // Okay, we found a newline in the range. If we have an open tag, we need 76 // to insert a close tag at the first non-whitespace before the newline. 77 if (HadOpenTag) 78 RB.InsertTextBefore(LastNonWhiteSpace+1, EndTag); 79 80 // Instead of inserting an open tag immediately after the newline, we 81 // wait until we see a non-whitespace character. This prevents us from 82 // inserting tags around blank lines, and also allows the open tag to 83 // be put *after* whitespace on a non-blank line. 84 HadOpenTag = false; 85 break; 86 case '\0': 87 case ' ': 88 case '\t': 89 case '\f': 90 case '\v': 91 // Ignore whitespace. 92 break; 93 94 default: 95 // If there is no tag open, do it now. 96 if (!HadOpenTag) { 97 RB.InsertTextAfter(i, StartTag); 98 HadOpenTag = true; 99 } 100 101 // Remember this character. 102 LastNonWhiteSpace = i; 103 break; 104 } 105 } 106 } 107 108 void html::EscapeText(Rewriter &R, FileID FID, 109 bool EscapeSpaces, bool ReplaceTabs) { 110 111 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID); 112 const char* C = Buf->getBufferStart(); 113 const char* FileEnd = Buf->getBufferEnd(); 114 115 assert (C <= FileEnd); 116 117 RewriteBuffer &RB = R.getEditBuffer(FID); 118 119 unsigned ColNo = 0; 120 for (unsigned FilePos = 0; C != FileEnd ; ++C, ++FilePos) { 121 switch (*C) { 122 default: ++ColNo; break; 123 case '\n': 124 case '\r': 125 ColNo = 0; 126 break; 127 128 case ' ': 129 if (EscapeSpaces) 130 RB.ReplaceText(FilePos, 1, " "); 131 ++ColNo; 132 break; 133 case '\f': 134 RB.ReplaceText(FilePos, 1, "<hr>"); 135 ColNo = 0; 136 break; 137 138 case '\t': { 139 if (!ReplaceTabs) 140 break; 141 unsigned NumSpaces = 8-(ColNo&7); 142 if (EscapeSpaces) 143 RB.ReplaceText(FilePos, 1, 144 StringRef(" " 145 " ", 6*NumSpaces)); 146 else 147 RB.ReplaceText(FilePos, 1, StringRef(" ", NumSpaces)); 148 ColNo += NumSpaces; 149 break; 150 } 151 case '<': 152 RB.ReplaceText(FilePos, 1, "<"); 153 ++ColNo; 154 break; 155 156 case '>': 157 RB.ReplaceText(FilePos, 1, ">"); 158 ++ColNo; 159 break; 160 161 case '&': 162 RB.ReplaceText(FilePos, 1, "&"); 163 ++ColNo; 164 break; 165 } 166 } 167 } 168 169 std::string html::EscapeText(StringRef s, bool EscapeSpaces, bool ReplaceTabs) { 170 171 unsigned len = s.size(); 172 std::string Str; 173 llvm::raw_string_ostream os(Str); 174 175 for (unsigned i = 0 ; i < len; ++i) { 176 177 char c = s[i]; 178 switch (c) { 179 default: 180 os << c; break; 181 182 case ' ': 183 if (EscapeSpaces) os << " "; 184 else os << ' '; 185 break; 186 187 case '\t': 188 if (ReplaceTabs) { 189 if (EscapeSpaces) 190 for (unsigned i = 0; i < 4; ++i) 191 os << " "; 192 else 193 for (unsigned i = 0; i < 4; ++i) 194 os << " "; 195 } 196 else 197 os << c; 198 199 break; 200 201 case '<': os << "<"; break; 202 case '>': os << ">"; break; 203 case '&': os << "&"; break; 204 } 205 } 206 207 return os.str(); 208 } 209 210 static void AddLineNumber(RewriteBuffer &RB, unsigned LineNo, 211 unsigned B, unsigned E) { 212 SmallString<256> Str; 213 llvm::raw_svector_ostream OS(Str); 214 215 OS << "<tr class=\"codeline\" data-linenumber=\"" << LineNo << "\">" 216 << "<td class=\"num\" id=\"LN" << LineNo << "\">" << LineNo 217 << "</td><td class=\"line\">"; 218 219 if (B == E) { // Handle empty lines. 220 OS << " </td></tr>"; 221 RB.InsertTextBefore(B, OS.str()); 222 } else { 223 RB.InsertTextBefore(B, OS.str()); 224 RB.InsertTextBefore(E, "</td></tr>"); 225 } 226 } 227 228 void html::AddLineNumbers(Rewriter& R, FileID FID) { 229 230 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID); 231 const char* FileBeg = Buf->getBufferStart(); 232 const char* FileEnd = Buf->getBufferEnd(); 233 const char* C = FileBeg; 234 RewriteBuffer &RB = R.getEditBuffer(FID); 235 236 assert (C <= FileEnd); 237 238 unsigned LineNo = 0; 239 unsigned FilePos = 0; 240 241 while (C != FileEnd) { 242 243 ++LineNo; 244 unsigned LineStartPos = FilePos; 245 unsigned LineEndPos = FileEnd - FileBeg; 246 247 assert (FilePos <= LineEndPos); 248 assert (C < FileEnd); 249 250 // Scan until the newline (or end-of-file). 251 252 while (C != FileEnd) { 253 char c = *C; 254 ++C; 255 256 if (c == '\n') { 257 LineEndPos = FilePos++; 258 break; 259 } 260 261 ++FilePos; 262 } 263 264 AddLineNumber(RB, LineNo, LineStartPos, LineEndPos); 265 } 266 267 // Add one big table tag that surrounds all of the code. 268 std::string s; 269 llvm::raw_string_ostream os(s); 270 os << "<table class=\"code\" data-fileid=\"" << FID.getHashValue() << "\">\n"; 271 RB.InsertTextBefore(0, os.str()); 272 RB.InsertTextAfter(FileEnd - FileBeg, "</table>"); 273 } 274 275 void html::AddHeaderFooterInternalBuiltinCSS(Rewriter &R, FileID FID, 276 StringRef title) { 277 278 const llvm::MemoryBuffer *Buf = R.getSourceMgr().getBuffer(FID); 279 const char* FileStart = Buf->getBufferStart(); 280 const char* FileEnd = Buf->getBufferEnd(); 281 282 SourceLocation StartLoc = R.getSourceMgr().getLocForStartOfFile(FID); 283 SourceLocation EndLoc = StartLoc.getLocWithOffset(FileEnd-FileStart); 284 285 std::string s; 286 llvm::raw_string_ostream os(s); 287 os << "<!doctype html>\n" // Use HTML 5 doctype 288 "<html>\n<head>\n"; 289 290 if (!title.empty()) 291 os << "<title>" << html::EscapeText(title) << "</title>\n"; 292 293 os << R"<<<( 294 <style type="text/css"> 295 body { color:#000000; background-color:#ffffff } 296 body { font-family:Helvetica, sans-serif; font-size:10pt } 297 h1 { font-size:14pt } 298 .FileName { margin-top: 5px; margin-bottom: 5px; display: inline; } 299 .FileNav { margin-left: 5px; margin-right: 5px; display: inline; } 300 .FileNav a { text-decoration:none; font-size: larger; } 301 .divider { margin-top: 30px; margin-bottom: 30px; height: 15px; } 302 .divider { background-color: gray; } 303 .code { border-collapse:collapse; width:100%; } 304 .code { font-family: "Monospace", monospace; font-size:10pt } 305 .code { line-height: 1.2em } 306 .comment { color: green; font-style: oblique } 307 .keyword { color: blue } 308 .string_literal { color: red } 309 .directive { color: darkmagenta } 310 /* Macro expansions. */ 311 .expansion { display: none; } 312 .macro:hover .expansion { 313 display: block; 314 border: 2px solid #FF0000; 315 padding: 2px; 316 background-color:#FFF0F0; 317 font-weight: normal; 318 -webkit-border-radius:5px; 319 -webkit-box-shadow:1px 1px 7px #000; 320 border-radius:5px; 321 box-shadow:1px 1px 7px #000; 322 position: absolute; 323 top: -1em; 324 left:10em; 325 z-index: 1 326 } 327 328 #tooltiphint { 329 position: fixed; 330 width: 50em; 331 margin-left: -25em; 332 left: 50%; 333 padding: 10px; 334 border: 1px solid #b0b0b0; 335 border-radius: 2px; 336 box-shadow: 1px 1px 7px black; 337 background-color: #c0c0c0; 338 z-index: 2; 339 } 340 .macro { 341 color: darkmagenta; 342 background-color:LemonChiffon; 343 /* Macros are position: relative to provide base for expansions. */ 344 position: relative; 345 } 346 347 .num { width:2.5em; padding-right:2ex; background-color:#eeeeee } 348 .num { text-align:right; font-size:8pt } 349 .num { color:#444444 } 350 .line { padding-left: 1ex; border-left: 3px solid #ccc } 351 .line { white-space: pre } 352 .msg { -webkit-box-shadow:1px 1px 7px #000 } 353 .msg { box-shadow:1px 1px 7px #000 } 354 .msg { -webkit-border-radius:5px } 355 .msg { border-radius:5px } 356 .msg { font-family:Helvetica, sans-serif; font-size:8pt } 357 .msg { float:left } 358 .msg { padding:0.25em 1ex 0.25em 1ex } 359 .msg { margin-top:10px; margin-bottom:10px } 360 .msg { font-weight:bold } 361 .msg { max-width:60em; word-wrap: break-word; white-space: pre-wrap } 362 .msgT { padding:0x; spacing:0x } 363 .msgEvent { background-color:#fff8b4; color:#000000 } 364 .msgControl { background-color:#bbbbbb; color:#000000 } 365 .msgNote { background-color:#ddeeff; color:#000000 } 366 .mrange { background-color:#dfddf3 } 367 .mrange { border-bottom:1px solid #6F9DBE } 368 .PathIndex { font-weight: bold; padding:0px 5px; margin-right:5px; } 369 .PathIndex { -webkit-border-radius:8px } 370 .PathIndex { border-radius:8px } 371 .PathIndexEvent { background-color:#bfba87 } 372 .PathIndexControl { background-color:#8c8c8c } 373 .PathNav a { text-decoration:none; font-size: larger } 374 .CodeInsertionHint { font-weight: bold; background-color: #10dd10 } 375 .CodeRemovalHint { background-color:#de1010 } 376 .CodeRemovalHint { border-bottom:1px solid #6F9DBE } 377 .selected{ background-color:orange !important; } 378 379 table.simpletable { 380 padding: 5px; 381 font-size:12pt; 382 margin:20px; 383 border-collapse: collapse; border-spacing: 0px; 384 } 385 td.rowname { 386 text-align: right; 387 vertical-align: top; 388 font-weight: bold; 389 color:#444444; 390 padding-right:2ex; 391 } 392 393 /* Hidden text. */ 394 input.spoilerhider + label { 395 cursor: pointer; 396 text-decoration: underline; 397 display: block; 398 } 399 input.spoilerhider { 400 display: none; 401 } 402 input.spoilerhider ~ .spoiler { 403 overflow: hidden; 404 margin: 10px auto 0; 405 height: 0; 406 opacity: 0; 407 } 408 input.spoilerhider:checked + label + .spoiler{ 409 height: auto; 410 opacity: 1; 411 } 412 </style> 413 </head> 414 <body>)<<<"; 415 416 // Generate header 417 R.InsertTextBefore(StartLoc, os.str()); 418 // Generate footer 419 420 R.InsertTextAfter(EndLoc, "</body></html>\n"); 421 } 422 423 /// SyntaxHighlight - Relex the specified FileID and annotate the HTML with 424 /// information about keywords, macro expansions etc. This uses the macro 425 /// table state from the end of the file, so it won't be perfectly perfect, 426 /// but it will be reasonably close. 427 void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) { 428 RewriteBuffer &RB = R.getEditBuffer(FID); 429 430 const SourceManager &SM = PP.getSourceManager(); 431 const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID); 432 Lexer L(FID, FromFile, SM, PP.getLangOpts()); 433 const char *BufferStart = L.getBuffer().data(); 434 435 // Inform the preprocessor that we want to retain comments as tokens, so we 436 // can highlight them. 437 L.SetCommentRetentionState(true); 438 439 // Lex all the tokens in raw mode, to avoid entering #includes or expanding 440 // macros. 441 Token Tok; 442 L.LexFromRawLexer(Tok); 443 444 while (Tok.isNot(tok::eof)) { 445 // Since we are lexing unexpanded tokens, all tokens are from the main 446 // FileID. 447 unsigned TokOffs = SM.getFileOffset(Tok.getLocation()); 448 unsigned TokLen = Tok.getLength(); 449 switch (Tok.getKind()) { 450 default: break; 451 case tok::identifier: 452 llvm_unreachable("tok::identifier in raw lexing mode!"); 453 case tok::raw_identifier: { 454 // Fill in Result.IdentifierInfo and update the token kind, 455 // looking up the identifier in the identifier table. 456 PP.LookUpIdentifierInfo(Tok); 457 458 // If this is a pp-identifier, for a keyword, highlight it as such. 459 if (Tok.isNot(tok::identifier)) 460 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, 461 "<span class='keyword'>", "</span>"); 462 break; 463 } 464 case tok::comment: 465 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, 466 "<span class='comment'>", "</span>"); 467 break; 468 case tok::utf8_string_literal: 469 // Chop off the u part of u8 prefix 470 ++TokOffs; 471 --TokLen; 472 // FALL THROUGH to chop the 8 473 LLVM_FALLTHROUGH; 474 case tok::wide_string_literal: 475 case tok::utf16_string_literal: 476 case tok::utf32_string_literal: 477 // Chop off the L, u, U or 8 prefix 478 ++TokOffs; 479 --TokLen; 480 // FALL THROUGH. 481 case tok::string_literal: 482 // FIXME: Exclude the optional ud-suffix from the highlighted range. 483 HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart, 484 "<span class='string_literal'>", "</span>"); 485 break; 486 case tok::hash: { 487 // If this is a preprocessor directive, all tokens to end of line are too. 488 if (!Tok.isAtStartOfLine()) 489 break; 490 491 // Eat all of the tokens until we get to the next one at the start of 492 // line. 493 unsigned TokEnd = TokOffs+TokLen; 494 L.LexFromRawLexer(Tok); 495 while (!Tok.isAtStartOfLine() && Tok.isNot(tok::eof)) { 496 TokEnd = SM.getFileOffset(Tok.getLocation())+Tok.getLength(); 497 L.LexFromRawLexer(Tok); 498 } 499 500 // Find end of line. This is a hack. 501 HighlightRange(RB, TokOffs, TokEnd, BufferStart, 502 "<span class='directive'>", "</span>"); 503 504 // Don't skip the next token. 505 continue; 506 } 507 } 508 509 L.LexFromRawLexer(Tok); 510 } 511 } 512 513 /// HighlightMacros - This uses the macro table state from the end of the 514 /// file, to re-expand macros and insert (into the HTML) information about the 515 /// macro expansions. This won't be perfectly perfect, but it will be 516 /// reasonably close. 517 void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) { 518 // Re-lex the raw token stream into a token buffer. 519 const SourceManager &SM = PP.getSourceManager(); 520 std::vector<Token> TokenStream; 521 522 const llvm::MemoryBuffer *FromFile = SM.getBuffer(FID); 523 Lexer L(FID, FromFile, SM, PP.getLangOpts()); 524 525 // Lex all the tokens in raw mode, to avoid entering #includes or expanding 526 // macros. 527 while (1) { 528 Token Tok; 529 L.LexFromRawLexer(Tok); 530 531 // If this is a # at the start of a line, discard it from the token stream. 532 // We don't want the re-preprocess step to see #defines, #includes or other 533 // preprocessor directives. 534 if (Tok.is(tok::hash) && Tok.isAtStartOfLine()) 535 continue; 536 537 // If this is a ## token, change its kind to unknown so that repreprocessing 538 // it will not produce an error. 539 if (Tok.is(tok::hashhash)) 540 Tok.setKind(tok::unknown); 541 542 // If this raw token is an identifier, the raw lexer won't have looked up 543 // the corresponding identifier info for it. Do this now so that it will be 544 // macro expanded when we re-preprocess it. 545 if (Tok.is(tok::raw_identifier)) 546 PP.LookUpIdentifierInfo(Tok); 547 548 TokenStream.push_back(Tok); 549 550 if (Tok.is(tok::eof)) break; 551 } 552 553 // Temporarily change the diagnostics object so that we ignore any generated 554 // diagnostics from this pass. 555 DiagnosticsEngine TmpDiags(PP.getDiagnostics().getDiagnosticIDs(), 556 &PP.getDiagnostics().getDiagnosticOptions(), 557 new IgnoringDiagConsumer); 558 559 // FIXME: This is a huge hack; we reuse the input preprocessor because we want 560 // its state, but we aren't actually changing it (we hope). This should really 561 // construct a copy of the preprocessor. 562 Preprocessor &TmpPP = const_cast<Preprocessor&>(PP); 563 DiagnosticsEngine *OldDiags = &TmpPP.getDiagnostics(); 564 TmpPP.setDiagnostics(TmpDiags); 565 566 // Inform the preprocessor that we don't want comments. 567 TmpPP.SetCommentRetentionState(false, false); 568 569 // We don't want pragmas either. Although we filtered out #pragma, removing 570 // _Pragma and __pragma is much harder. 571 bool PragmasPreviouslyEnabled = TmpPP.getPragmasEnabled(); 572 TmpPP.setPragmasEnabled(false); 573 574 // Enter the tokens we just lexed. This will cause them to be macro expanded 575 // but won't enter sub-files (because we removed #'s). 576 TmpPP.EnterTokenStream(TokenStream, false); 577 578 TokenConcatenation ConcatInfo(TmpPP); 579 580 // Lex all the tokens. 581 Token Tok; 582 TmpPP.Lex(Tok); 583 while (Tok.isNot(tok::eof)) { 584 // Ignore non-macro tokens. 585 if (!Tok.getLocation().isMacroID()) { 586 TmpPP.Lex(Tok); 587 continue; 588 } 589 590 // Okay, we have the first token of a macro expansion: highlight the 591 // expansion by inserting a start tag before the macro expansion and 592 // end tag after it. 593 CharSourceRange LLoc = SM.getExpansionRange(Tok.getLocation()); 594 595 // Ignore tokens whose instantiation location was not the main file. 596 if (SM.getFileID(LLoc.getBegin()) != FID) { 597 TmpPP.Lex(Tok); 598 continue; 599 } 600 601 assert(SM.getFileID(LLoc.getEnd()) == FID && 602 "Start and end of expansion must be in the same ultimate file!"); 603 604 std::string Expansion = EscapeText(TmpPP.getSpelling(Tok)); 605 unsigned LineLen = Expansion.size(); 606 607 Token PrevPrevTok; 608 Token PrevTok = Tok; 609 // Okay, eat this token, getting the next one. 610 TmpPP.Lex(Tok); 611 612 // Skip all the rest of the tokens that are part of this macro 613 // instantiation. It would be really nice to pop up a window with all the 614 // spelling of the tokens or something. 615 while (!Tok.is(tok::eof) && 616 SM.getExpansionLoc(Tok.getLocation()) == LLoc.getBegin()) { 617 // Insert a newline if the macro expansion is getting large. 618 if (LineLen > 60) { 619 Expansion += "<br>"; 620 LineLen = 0; 621 } 622 623 LineLen -= Expansion.size(); 624 625 // If the tokens were already space separated, or if they must be to avoid 626 // them being implicitly pasted, add a space between them. 627 if (Tok.hasLeadingSpace() || 628 ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok)) 629 Expansion += ' '; 630 631 // Escape any special characters in the token text. 632 Expansion += EscapeText(TmpPP.getSpelling(Tok)); 633 LineLen += Expansion.size(); 634 635 PrevPrevTok = PrevTok; 636 PrevTok = Tok; 637 TmpPP.Lex(Tok); 638 } 639 640 641 // Insert the expansion as the end tag, so that multi-line macros all get 642 // highlighted. 643 Expansion = "<span class='expansion'>" + Expansion + "</span></span>"; 644 645 HighlightRange(R, LLoc.getBegin(), LLoc.getEnd(), "<span class='macro'>", 646 Expansion.c_str(), LLoc.isTokenRange()); 647 } 648 649 // Restore the preprocessor's old state. 650 TmpPP.setDiagnostics(*OldDiags); 651 TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled); 652 } 653