1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/AST/CommentParser.h" 10 #include "clang/AST/CommentCommandTraits.h" 11 #include "clang/AST/CommentDiagnostic.h" 12 #include "clang/AST/CommentSema.h" 13 #include "clang/Basic/CharInfo.h" 14 #include "clang/Basic/SourceManager.h" 15 #include "llvm/Support/ErrorHandling.h" 16 17 namespace clang { 18 19 static inline bool isWhitespace(llvm::StringRef S) { 20 for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) { 21 if (!isWhitespace(*I)) 22 return false; 23 } 24 return true; 25 } 26 27 namespace comments { 28 29 /// Re-lexes a sequence of tok::text tokens. 30 class TextTokenRetokenizer { 31 llvm::BumpPtrAllocator &Allocator; 32 Parser &P; 33 34 /// This flag is set when there are no more tokens we can fetch from lexer. 35 bool NoMoreInterestingTokens; 36 37 /// Token buffer: tokens we have processed and lookahead. 38 SmallVector<Token, 16> Toks; 39 40 /// A position in \c Toks. 41 struct Position { 42 const char *BufferStart; 43 const char *BufferEnd; 44 const char *BufferPtr; 45 SourceLocation BufferStartLoc; 46 unsigned CurToken; 47 }; 48 49 /// Current position in Toks. 50 Position Pos; 51 52 bool isEnd() const { 53 return Pos.CurToken >= Toks.size(); 54 } 55 56 /// Sets up the buffer pointers to point to current token. 57 void setupBuffer() { 58 assert(!isEnd()); 59 const Token &Tok = Toks[Pos.CurToken]; 60 61 Pos.BufferStart = Tok.getText().begin(); 62 Pos.BufferEnd = Tok.getText().end(); 63 Pos.BufferPtr = Pos.BufferStart; 64 Pos.BufferStartLoc = Tok.getLocation(); 65 } 66 67 SourceLocation getSourceLocation() const { 68 const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; 69 return Pos.BufferStartLoc.getLocWithOffset(CharNo); 70 } 71 72 char peek() const { 73 assert(!isEnd()); 74 assert(Pos.BufferPtr != Pos.BufferEnd); 75 return *Pos.BufferPtr; 76 } 77 78 void consumeChar() { 79 assert(!isEnd()); 80 assert(Pos.BufferPtr != Pos.BufferEnd); 81 Pos.BufferPtr++; 82 if (Pos.BufferPtr == Pos.BufferEnd) { 83 Pos.CurToken++; 84 if (isEnd() && !addToken()) 85 return; 86 87 assert(!isEnd()); 88 setupBuffer(); 89 } 90 } 91 92 /// Extract a template type 93 bool lexTemplate(SmallString<32> &WordText) { 94 unsigned BracketCount = 0; 95 while (!isEnd()) { 96 const char C = peek(); 97 WordText.push_back(C); 98 consumeChar(); 99 switch (C) { 100 case '<': { 101 BracketCount++; 102 break; 103 } 104 case '>': { 105 BracketCount--; 106 if (!BracketCount) 107 return true; 108 break; 109 } 110 default: 111 break; 112 } 113 } 114 return false; 115 } 116 117 /// Add a token. 118 /// Returns true on success, false if there are no interesting tokens to 119 /// fetch from lexer. 120 bool addToken() { 121 if (NoMoreInterestingTokens) 122 return false; 123 124 if (P.Tok.is(tok::newline)) { 125 // If we see a single newline token between text tokens, skip it. 126 Token Newline = P.Tok; 127 P.consumeToken(); 128 if (P.Tok.isNot(tok::text)) { 129 P.putBack(Newline); 130 NoMoreInterestingTokens = true; 131 return false; 132 } 133 } 134 if (P.Tok.isNot(tok::text)) { 135 NoMoreInterestingTokens = true; 136 return false; 137 } 138 139 Toks.push_back(P.Tok); 140 P.consumeToken(); 141 if (Toks.size() == 1) 142 setupBuffer(); 143 return true; 144 } 145 146 void consumeWhitespace() { 147 while (!isEnd()) { 148 if (isWhitespace(peek())) 149 consumeChar(); 150 else 151 break; 152 } 153 } 154 155 void formTokenWithChars(Token &Result, 156 SourceLocation Loc, 157 const char *TokBegin, 158 unsigned TokLength, 159 StringRef Text) { 160 Result.setLocation(Loc); 161 Result.setKind(tok::text); 162 Result.setLength(TokLength); 163 #ifndef NDEBUG 164 Result.TextPtr = "<UNSET>"; 165 Result.IntVal = 7; 166 #endif 167 Result.setText(Text); 168 } 169 170 public: 171 TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P): 172 Allocator(Allocator), P(P), NoMoreInterestingTokens(false) { 173 Pos.CurToken = 0; 174 addToken(); 175 } 176 177 /// Extract a type argument 178 bool lexType(Token &Tok) { 179 if (isEnd()) 180 return false; 181 182 // Save current position in case we need to rollback because the type is 183 // empty. 184 Position SavedPos = Pos; 185 186 // Consume any leading whitespace. 187 consumeWhitespace(); 188 SmallString<32> WordText; 189 const char *WordBegin = Pos.BufferPtr; 190 SourceLocation Loc = getSourceLocation(); 191 192 while (!isEnd()) { 193 const char C = peek(); 194 // For non-whitespace characters we check if it's a template or otherwise 195 // continue reading the text into a word. 196 if (!isWhitespace(C)) { 197 if (C == '<') { 198 if (!lexTemplate(WordText)) 199 return false; 200 } else { 201 WordText.push_back(C); 202 consumeChar(); 203 } 204 } else { 205 consumeChar(); 206 break; 207 } 208 } 209 210 const unsigned Length = WordText.size(); 211 if (Length == 0) { 212 Pos = SavedPos; 213 return false; 214 } 215 216 char *TextPtr = Allocator.Allocate<char>(Length + 1); 217 218 memcpy(TextPtr, WordText.c_str(), Length + 1); 219 StringRef Text = StringRef(TextPtr, Length); 220 221 formTokenWithChars(Tok, Loc, WordBegin, Length, Text); 222 return true; 223 } 224 225 /// Extract a word -- sequence of non-whitespace characters. 226 bool lexWord(Token &Tok) { 227 if (isEnd()) 228 return false; 229 230 Position SavedPos = Pos; 231 232 consumeWhitespace(); 233 SmallString<32> WordText; 234 const char *WordBegin = Pos.BufferPtr; 235 SourceLocation Loc = getSourceLocation(); 236 while (!isEnd()) { 237 const char C = peek(); 238 if (!isWhitespace(C)) { 239 WordText.push_back(C); 240 consumeChar(); 241 } else 242 break; 243 } 244 const unsigned Length = WordText.size(); 245 if (Length == 0) { 246 Pos = SavedPos; 247 return false; 248 } 249 250 char *TextPtr = Allocator.Allocate<char>(Length + 1); 251 252 memcpy(TextPtr, WordText.c_str(), Length + 1); 253 StringRef Text = StringRef(TextPtr, Length); 254 255 formTokenWithChars(Tok, Loc, WordBegin, Length, Text); 256 return true; 257 } 258 259 bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { 260 if (isEnd()) 261 return false; 262 263 Position SavedPos = Pos; 264 265 consumeWhitespace(); 266 SmallString<32> WordText; 267 const char *WordBegin = Pos.BufferPtr; 268 SourceLocation Loc = getSourceLocation(); 269 bool Error = false; 270 if (!isEnd()) { 271 const char C = peek(); 272 if (C == OpenDelim) { 273 WordText.push_back(C); 274 consumeChar(); 275 } else 276 Error = true; 277 } 278 char C = '\0'; 279 while (!Error && !isEnd()) { 280 C = peek(); 281 WordText.push_back(C); 282 consumeChar(); 283 if (C == CloseDelim) 284 break; 285 } 286 if (!Error && C != CloseDelim) 287 Error = true; 288 289 if (Error) { 290 Pos = SavedPos; 291 return false; 292 } 293 294 const unsigned Length = WordText.size(); 295 char *TextPtr = Allocator.Allocate<char>(Length + 1); 296 297 memcpy(TextPtr, WordText.c_str(), Length + 1); 298 StringRef Text = StringRef(TextPtr, Length); 299 300 formTokenWithChars(Tok, Loc, WordBegin, 301 Pos.BufferPtr - WordBegin, Text); 302 return true; 303 } 304 305 /// Put back tokens that we didn't consume. 306 void putBackLeftoverTokens() { 307 if (isEnd()) 308 return; 309 310 bool HavePartialTok = false; 311 Token PartialTok; 312 if (Pos.BufferPtr != Pos.BufferStart) { 313 formTokenWithChars(PartialTok, getSourceLocation(), 314 Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, 315 StringRef(Pos.BufferPtr, 316 Pos.BufferEnd - Pos.BufferPtr)); 317 HavePartialTok = true; 318 Pos.CurToken++; 319 } 320 321 P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); 322 Pos.CurToken = Toks.size(); 323 324 if (HavePartialTok) 325 P.putBack(PartialTok); 326 } 327 }; 328 329 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, 330 const SourceManager &SourceMgr, DiagnosticsEngine &Diags, 331 const CommandTraits &Traits): 332 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), 333 Traits(Traits) { 334 consumeToken(); 335 } 336 337 void Parser::parseParamCommandArgs(ParamCommandComment *PC, 338 TextTokenRetokenizer &Retokenizer) { 339 Token Arg; 340 // Check if argument looks like direction specification: [dir] 341 // e.g., [in], [out], [in,out] 342 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) 343 S.actOnParamCommandDirectionArg(PC, 344 Arg.getLocation(), 345 Arg.getEndLocation(), 346 Arg.getText()); 347 348 if (Retokenizer.lexWord(Arg)) 349 S.actOnParamCommandParamNameArg(PC, 350 Arg.getLocation(), 351 Arg.getEndLocation(), 352 Arg.getText()); 353 } 354 355 void Parser::parseTParamCommandArgs(TParamCommandComment *TPC, 356 TextTokenRetokenizer &Retokenizer) { 357 Token Arg; 358 if (Retokenizer.lexWord(Arg)) 359 S.actOnTParamCommandParamNameArg(TPC, 360 Arg.getLocation(), 361 Arg.getEndLocation(), 362 Arg.getText()); 363 } 364 365 ArrayRef<Comment::Argument> 366 Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) { 367 auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) 368 Comment::Argument[NumArgs]; 369 unsigned ParsedArgs = 0; 370 Token Arg; 371 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { 372 Args[ParsedArgs] = Comment::Argument{ 373 SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; 374 ParsedArgs++; 375 } 376 377 return llvm::ArrayRef(Args, ParsedArgs); 378 } 379 380 ArrayRef<Comment::Argument> 381 Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, 382 unsigned NumArgs) { 383 auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) 384 Comment::Argument[NumArgs]; 385 unsigned ParsedArgs = 0; 386 Token Arg; 387 388 while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) { 389 Args[ParsedArgs] = Comment::Argument{ 390 SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; 391 ParsedArgs++; 392 } 393 394 return llvm::ArrayRef(Args, ParsedArgs); 395 } 396 397 BlockCommandComment *Parser::parseBlockCommand() { 398 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); 399 400 ParamCommandComment *PC = nullptr; 401 TParamCommandComment *TPC = nullptr; 402 BlockCommandComment *BC = nullptr; 403 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); 404 CommandMarkerKind CommandMarker = 405 Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At; 406 if (Info->IsParamCommand) { 407 PC = S.actOnParamCommandStart(Tok.getLocation(), 408 Tok.getEndLocation(), 409 Tok.getCommandID(), 410 CommandMarker); 411 } else if (Info->IsTParamCommand) { 412 TPC = S.actOnTParamCommandStart(Tok.getLocation(), 413 Tok.getEndLocation(), 414 Tok.getCommandID(), 415 CommandMarker); 416 } else { 417 BC = S.actOnBlockCommandStart(Tok.getLocation(), 418 Tok.getEndLocation(), 419 Tok.getCommandID(), 420 CommandMarker); 421 } 422 consumeToken(); 423 424 if (isTokBlockCommand()) { 425 // Block command ahead. We can't nest block commands, so pretend that this 426 // command has an empty argument. 427 ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt); 428 if (PC) { 429 S.actOnParamCommandFinish(PC, Paragraph); 430 return PC; 431 } else if (TPC) { 432 S.actOnTParamCommandFinish(TPC, Paragraph); 433 return TPC; 434 } else { 435 S.actOnBlockCommandFinish(BC, Paragraph); 436 return BC; 437 } 438 } 439 440 if (PC || TPC || Info->NumArgs > 0) { 441 // In order to parse command arguments we need to retokenize a few 442 // following text tokens. 443 TextTokenRetokenizer Retokenizer(Allocator, *this); 444 445 if (PC) 446 parseParamCommandArgs(PC, Retokenizer); 447 else if (TPC) 448 parseTParamCommandArgs(TPC, Retokenizer); 449 else if (Info->IsThrowsCommand) 450 S.actOnBlockCommandArgs( 451 BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs)); 452 else 453 S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs)); 454 455 Retokenizer.putBackLeftoverTokens(); 456 } 457 458 // If there's a block command ahead, we will attach an empty paragraph to 459 // this command. 460 bool EmptyParagraph = false; 461 if (isTokBlockCommand()) 462 EmptyParagraph = true; 463 else if (Tok.is(tok::newline)) { 464 Token PrevTok = Tok; 465 consumeToken(); 466 EmptyParagraph = isTokBlockCommand(); 467 putBack(PrevTok); 468 } 469 470 ParagraphComment *Paragraph; 471 if (EmptyParagraph) 472 Paragraph = S.actOnParagraphComment(std::nullopt); 473 else { 474 BlockContentComment *Block = parseParagraphOrBlockCommand(); 475 // Since we have checked for a block command, we should have parsed a 476 // paragraph. 477 Paragraph = cast<ParagraphComment>(Block); 478 } 479 480 if (PC) { 481 S.actOnParamCommandFinish(PC, Paragraph); 482 return PC; 483 } else if (TPC) { 484 S.actOnTParamCommandFinish(TPC, Paragraph); 485 return TPC; 486 } else { 487 S.actOnBlockCommandFinish(BC, Paragraph); 488 return BC; 489 } 490 } 491 492 InlineCommandComment *Parser::parseInlineCommand() { 493 assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); 494 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); 495 496 const Token CommandTok = Tok; 497 consumeToken(); 498 499 TextTokenRetokenizer Retokenizer(Allocator, *this); 500 ArrayRef<Comment::Argument> Args = 501 parseCommandArgs(Retokenizer, Info->NumArgs); 502 503 InlineCommandComment *IC = S.actOnInlineCommand( 504 CommandTok.getLocation(), CommandTok.getEndLocation(), 505 CommandTok.getCommandID(), Args); 506 507 if (Args.size() < Info->NumArgs) { 508 Diag(CommandTok.getEndLocation().getLocWithOffset(1), 509 diag::warn_doc_inline_command_not_enough_arguments) 510 << CommandTok.is(tok::at_command) << Info->Name << Args.size() 511 << Info->NumArgs 512 << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation()); 513 } 514 515 Retokenizer.putBackLeftoverTokens(); 516 517 return IC; 518 } 519 520 HTMLStartTagComment *Parser::parseHTMLStartTag() { 521 assert(Tok.is(tok::html_start_tag)); 522 HTMLStartTagComment *HST = 523 S.actOnHTMLStartTagStart(Tok.getLocation(), 524 Tok.getHTMLTagStartName()); 525 consumeToken(); 526 527 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; 528 while (true) { 529 switch (Tok.getKind()) { 530 case tok::html_ident: { 531 Token Ident = Tok; 532 consumeToken(); 533 if (Tok.isNot(tok::html_equals)) { 534 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 535 Ident.getHTMLIdent())); 536 continue; 537 } 538 Token Equals = Tok; 539 consumeToken(); 540 if (Tok.isNot(tok::html_quoted_string)) { 541 Diag(Tok.getLocation(), 542 diag::warn_doc_html_start_tag_expected_quoted_string) 543 << SourceRange(Equals.getLocation()); 544 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 545 Ident.getHTMLIdent())); 546 while (Tok.is(tok::html_equals) || 547 Tok.is(tok::html_quoted_string)) 548 consumeToken(); 549 continue; 550 } 551 Attrs.push_back(HTMLStartTagComment::Attribute( 552 Ident.getLocation(), 553 Ident.getHTMLIdent(), 554 Equals.getLocation(), 555 SourceRange(Tok.getLocation(), 556 Tok.getEndLocation()), 557 Tok.getHTMLQuotedString())); 558 consumeToken(); 559 continue; 560 } 561 562 case tok::html_greater: 563 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 564 Tok.getLocation(), 565 /* IsSelfClosing = */ false); 566 consumeToken(); 567 return HST; 568 569 case tok::html_slash_greater: 570 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 571 Tok.getLocation(), 572 /* IsSelfClosing = */ true); 573 consumeToken(); 574 return HST; 575 576 case tok::html_equals: 577 case tok::html_quoted_string: 578 Diag(Tok.getLocation(), 579 diag::warn_doc_html_start_tag_expected_ident_or_greater); 580 while (Tok.is(tok::html_equals) || 581 Tok.is(tok::html_quoted_string)) 582 consumeToken(); 583 if (Tok.is(tok::html_ident) || 584 Tok.is(tok::html_greater) || 585 Tok.is(tok::html_slash_greater)) 586 continue; 587 588 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 589 SourceLocation(), 590 /* IsSelfClosing = */ false); 591 return HST; 592 593 default: 594 // Not a token from an HTML start tag. Thus HTML tag prematurely ended. 595 S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 596 SourceLocation(), 597 /* IsSelfClosing = */ false); 598 bool StartLineInvalid; 599 const unsigned StartLine = SourceMgr.getPresumedLineNumber( 600 HST->getLocation(), 601 &StartLineInvalid); 602 bool EndLineInvalid; 603 const unsigned EndLine = SourceMgr.getPresumedLineNumber( 604 Tok.getLocation(), 605 &EndLineInvalid); 606 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) 607 Diag(Tok.getLocation(), 608 diag::warn_doc_html_start_tag_expected_ident_or_greater) 609 << HST->getSourceRange(); 610 else { 611 Diag(Tok.getLocation(), 612 diag::warn_doc_html_start_tag_expected_ident_or_greater); 613 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) 614 << HST->getSourceRange(); 615 } 616 return HST; 617 } 618 } 619 } 620 621 HTMLEndTagComment *Parser::parseHTMLEndTag() { 622 assert(Tok.is(tok::html_end_tag)); 623 Token TokEndTag = Tok; 624 consumeToken(); 625 SourceLocation Loc; 626 if (Tok.is(tok::html_greater)) { 627 Loc = Tok.getLocation(); 628 consumeToken(); 629 } 630 631 return S.actOnHTMLEndTag(TokEndTag.getLocation(), 632 Loc, 633 TokEndTag.getHTMLTagEndName()); 634 } 635 636 BlockContentComment *Parser::parseParagraphOrBlockCommand() { 637 SmallVector<InlineContentComment *, 8> Content; 638 639 while (true) { 640 switch (Tok.getKind()) { 641 case tok::verbatim_block_begin: 642 case tok::verbatim_line_name: 643 case tok::eof: 644 break; // Block content or EOF ahead, finish this parapgaph. 645 646 case tok::unknown_command: 647 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 648 Tok.getEndLocation(), 649 Tok.getUnknownCommandName())); 650 consumeToken(); 651 continue; 652 653 case tok::backslash_command: 654 case tok::at_command: { 655 const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); 656 if (Info->IsBlockCommand) { 657 if (Content.size() == 0) 658 return parseBlockCommand(); 659 break; // Block command ahead, finish this parapgaph. 660 } 661 if (Info->IsVerbatimBlockEndCommand) { 662 Diag(Tok.getLocation(), 663 diag::warn_verbatim_block_end_without_start) 664 << Tok.is(tok::at_command) 665 << Info->Name 666 << SourceRange(Tok.getLocation(), Tok.getEndLocation()); 667 consumeToken(); 668 continue; 669 } 670 if (Info->IsUnknownCommand) { 671 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 672 Tok.getEndLocation(), 673 Info->getID())); 674 consumeToken(); 675 continue; 676 } 677 assert(Info->IsInlineCommand); 678 Content.push_back(parseInlineCommand()); 679 continue; 680 } 681 682 case tok::newline: { 683 consumeToken(); 684 if (Tok.is(tok::newline) || Tok.is(tok::eof)) { 685 consumeToken(); 686 break; // Two newlines -- end of paragraph. 687 } 688 // Also allow [tok::newline, tok::text, tok::newline] if the middle 689 // tok::text is just whitespace. 690 if (Tok.is(tok::text) && isWhitespace(Tok.getText())) { 691 Token WhitespaceTok = Tok; 692 consumeToken(); 693 if (Tok.is(tok::newline) || Tok.is(tok::eof)) { 694 consumeToken(); 695 break; 696 } 697 // We have [tok::newline, tok::text, non-newline]. Put back tok::text. 698 putBack(WhitespaceTok); 699 } 700 if (Content.size() > 0) 701 Content.back()->addTrailingNewline(); 702 continue; 703 } 704 705 // Don't deal with HTML tag soup now. 706 case tok::html_start_tag: 707 Content.push_back(parseHTMLStartTag()); 708 continue; 709 710 case tok::html_end_tag: 711 Content.push_back(parseHTMLEndTag()); 712 continue; 713 714 case tok::text: 715 Content.push_back(S.actOnText(Tok.getLocation(), 716 Tok.getEndLocation(), 717 Tok.getText())); 718 consumeToken(); 719 continue; 720 721 case tok::verbatim_block_line: 722 case tok::verbatim_block_end: 723 case tok::verbatim_line_text: 724 case tok::html_ident: 725 case tok::html_equals: 726 case tok::html_quoted_string: 727 case tok::html_greater: 728 case tok::html_slash_greater: 729 llvm_unreachable("should not see this token"); 730 } 731 break; 732 } 733 734 return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content))); 735 } 736 737 VerbatimBlockComment *Parser::parseVerbatimBlock() { 738 assert(Tok.is(tok::verbatim_block_begin)); 739 740 VerbatimBlockComment *VB = 741 S.actOnVerbatimBlockStart(Tok.getLocation(), 742 Tok.getVerbatimBlockID()); 743 consumeToken(); 744 745 // Don't create an empty line if verbatim opening command is followed 746 // by a newline. 747 if (Tok.is(tok::newline)) 748 consumeToken(); 749 750 SmallVector<VerbatimBlockLineComment *, 8> Lines; 751 while (Tok.is(tok::verbatim_block_line) || 752 Tok.is(tok::newline)) { 753 VerbatimBlockLineComment *Line; 754 if (Tok.is(tok::verbatim_block_line)) { 755 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), 756 Tok.getVerbatimBlockText()); 757 consumeToken(); 758 if (Tok.is(tok::newline)) { 759 consumeToken(); 760 } 761 } else { 762 // Empty line, just a tok::newline. 763 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); 764 consumeToken(); 765 } 766 Lines.push_back(Line); 767 } 768 769 if (Tok.is(tok::verbatim_block_end)) { 770 const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID()); 771 S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name, 772 S.copyArray(llvm::ArrayRef(Lines))); 773 consumeToken(); 774 } else { 775 // Unterminated \\verbatim block 776 S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", 777 S.copyArray(llvm::ArrayRef(Lines))); 778 } 779 780 return VB; 781 } 782 783 VerbatimLineComment *Parser::parseVerbatimLine() { 784 assert(Tok.is(tok::verbatim_line_name)); 785 786 Token NameTok = Tok; 787 consumeToken(); 788 789 SourceLocation TextBegin; 790 StringRef Text; 791 // Next token might not be a tok::verbatim_line_text if verbatim line 792 // starting command comes just before a newline or comment end. 793 if (Tok.is(tok::verbatim_line_text)) { 794 TextBegin = Tok.getLocation(); 795 Text = Tok.getVerbatimLineText(); 796 } else { 797 TextBegin = NameTok.getEndLocation(); 798 Text = ""; 799 } 800 801 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), 802 NameTok.getVerbatimLineID(), 803 TextBegin, 804 Text); 805 consumeToken(); 806 return VL; 807 } 808 809 BlockContentComment *Parser::parseBlockContent() { 810 switch (Tok.getKind()) { 811 case tok::text: 812 case tok::unknown_command: 813 case tok::backslash_command: 814 case tok::at_command: 815 case tok::html_start_tag: 816 case tok::html_end_tag: 817 return parseParagraphOrBlockCommand(); 818 819 case tok::verbatim_block_begin: 820 return parseVerbatimBlock(); 821 822 case tok::verbatim_line_name: 823 return parseVerbatimLine(); 824 825 case tok::eof: 826 case tok::newline: 827 case tok::verbatim_block_line: 828 case tok::verbatim_block_end: 829 case tok::verbatim_line_text: 830 case tok::html_ident: 831 case tok::html_equals: 832 case tok::html_quoted_string: 833 case tok::html_greater: 834 case tok::html_slash_greater: 835 llvm_unreachable("should not see this token"); 836 } 837 llvm_unreachable("bogus token kind"); 838 } 839 840 FullComment *Parser::parseFullComment() { 841 // Skip newlines at the beginning of the comment. 842 while (Tok.is(tok::newline)) 843 consumeToken(); 844 845 SmallVector<BlockContentComment *, 8> Blocks; 846 while (Tok.isNot(tok::eof)) { 847 Blocks.push_back(parseBlockContent()); 848 849 // Skip extra newlines after paragraph end. 850 while (Tok.is(tok::newline)) 851 consumeToken(); 852 } 853 return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks))); 854 } 855 856 } // end namespace comments 857 } // end namespace clang 858