xref: /llvm-project/clang/lib/AST/CommentParser.cpp (revision fade04f81da9db974204b9d7c58b4affd0422d6e)
1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/AST/CommentParser.h"
10 #include "clang/AST/CommentCommandTraits.h"
11 #include "clang/AST/CommentDiagnostic.h"
12 #include "clang/AST/CommentSema.h"
13 #include "clang/Basic/CharInfo.h"
14 #include "clang/Basic/SourceManager.h"
15 #include "llvm/Support/ErrorHandling.h"
16 
17 namespace clang {
18 
19 static inline bool isWhitespace(llvm::StringRef S) {
20   for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21     if (!isWhitespace(*I))
22       return false;
23   }
24   return true;
25 }
26 
27 namespace comments {
28 
29 /// Re-lexes a sequence of tok::text tokens.
30 class TextTokenRetokenizer {
31   llvm::BumpPtrAllocator &Allocator;
32   Parser &P;
33 
34   /// This flag is set when there are no more tokens we can fetch from lexer.
35   bool NoMoreInterestingTokens;
36 
37   /// Token buffer: tokens we have processed and lookahead.
38   SmallVector<Token, 16> Toks;
39 
40   /// A position in \c Toks.
41   struct Position {
42     const char *BufferStart;
43     const char *BufferEnd;
44     const char *BufferPtr;
45     SourceLocation BufferStartLoc;
46     unsigned CurToken;
47   };
48 
49   /// Current position in Toks.
50   Position Pos;
51 
52   bool isEnd() const {
53     return Pos.CurToken >= Toks.size();
54   }
55 
56   /// Sets up the buffer pointers to point to current token.
57   void setupBuffer() {
58     assert(!isEnd());
59     const Token &Tok = Toks[Pos.CurToken];
60 
61     Pos.BufferStart = Tok.getText().begin();
62     Pos.BufferEnd = Tok.getText().end();
63     Pos.BufferPtr = Pos.BufferStart;
64     Pos.BufferStartLoc = Tok.getLocation();
65   }
66 
67   SourceLocation getSourceLocation() const {
68     const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69     return Pos.BufferStartLoc.getLocWithOffset(CharNo);
70   }
71 
72   char peek() const {
73     assert(!isEnd());
74     assert(Pos.BufferPtr != Pos.BufferEnd);
75     return *Pos.BufferPtr;
76   }
77 
78   void consumeChar() {
79     assert(!isEnd());
80     assert(Pos.BufferPtr != Pos.BufferEnd);
81     Pos.BufferPtr++;
82     if (Pos.BufferPtr == Pos.BufferEnd) {
83       Pos.CurToken++;
84       if (isEnd() && !addToken())
85         return;
86 
87       assert(!isEnd());
88       setupBuffer();
89     }
90   }
91 
92   /// Extract a template type
93   bool lexTemplate(SmallString<32> &WordText) {
94     unsigned BracketCount = 0;
95     while (!isEnd()) {
96       const char C = peek();
97       WordText.push_back(C);
98       consumeChar();
99       switch (C) {
100       case '<': {
101         BracketCount++;
102         break;
103       }
104       case '>': {
105         BracketCount--;
106         if (!BracketCount)
107           return true;
108         break;
109       }
110       default:
111         break;
112       }
113     }
114     return false;
115   }
116 
117   /// Add a token.
118   /// Returns true on success, false if there are no interesting tokens to
119   /// fetch from lexer.
120   bool addToken() {
121     if (NoMoreInterestingTokens)
122       return false;
123 
124     if (P.Tok.is(tok::newline)) {
125       // If we see a single newline token between text tokens, skip it.
126       Token Newline = P.Tok;
127       P.consumeToken();
128       if (P.Tok.isNot(tok::text)) {
129         P.putBack(Newline);
130         NoMoreInterestingTokens = true;
131         return false;
132       }
133     }
134     if (P.Tok.isNot(tok::text)) {
135       NoMoreInterestingTokens = true;
136       return false;
137     }
138 
139     Toks.push_back(P.Tok);
140     P.consumeToken();
141     if (Toks.size() == 1)
142       setupBuffer();
143     return true;
144   }
145 
146   void consumeWhitespace() {
147     while (!isEnd()) {
148       if (isWhitespace(peek()))
149         consumeChar();
150       else
151         break;
152     }
153   }
154 
155   void formTokenWithChars(Token &Result,
156                           SourceLocation Loc,
157                           const char *TokBegin,
158                           unsigned TokLength,
159                           StringRef Text) {
160     Result.setLocation(Loc);
161     Result.setKind(tok::text);
162     Result.setLength(TokLength);
163 #ifndef NDEBUG
164     Result.TextPtr = "<UNSET>";
165     Result.IntVal = 7;
166 #endif
167     Result.setText(Text);
168   }
169 
170 public:
171   TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
172       Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
173     Pos.CurToken = 0;
174     addToken();
175   }
176 
177   /// Extract a type argument
178   bool lexType(Token &Tok) {
179     if (isEnd())
180       return false;
181 
182     // Save current position in case we need to rollback because the type is
183     // empty.
184     Position SavedPos = Pos;
185 
186     // Consume any leading whitespace.
187     consumeWhitespace();
188     SmallString<32> WordText;
189     const char *WordBegin = Pos.BufferPtr;
190     SourceLocation Loc = getSourceLocation();
191 
192     while (!isEnd()) {
193       const char C = peek();
194       // For non-whitespace characters we check if it's a template or otherwise
195       // continue reading the text into a word.
196       if (!isWhitespace(C)) {
197         if (C == '<') {
198           if (!lexTemplate(WordText))
199             return false;
200         } else {
201           WordText.push_back(C);
202           consumeChar();
203         }
204       } else {
205         consumeChar();
206         break;
207       }
208     }
209 
210     const unsigned Length = WordText.size();
211     if (Length == 0) {
212       Pos = SavedPos;
213       return false;
214     }
215 
216     char *TextPtr = Allocator.Allocate<char>(Length + 1);
217 
218     memcpy(TextPtr, WordText.c_str(), Length + 1);
219     StringRef Text = StringRef(TextPtr, Length);
220 
221     formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
222     return true;
223   }
224 
225   /// Extract a word -- sequence of non-whitespace characters.
226   bool lexWord(Token &Tok) {
227     if (isEnd())
228       return false;
229 
230     Position SavedPos = Pos;
231 
232     consumeWhitespace();
233     SmallString<32> WordText;
234     const char *WordBegin = Pos.BufferPtr;
235     SourceLocation Loc = getSourceLocation();
236     while (!isEnd()) {
237       const char C = peek();
238       if (!isWhitespace(C)) {
239         WordText.push_back(C);
240         consumeChar();
241       } else
242         break;
243     }
244     const unsigned Length = WordText.size();
245     if (Length == 0) {
246       Pos = SavedPos;
247       return false;
248     }
249 
250     char *TextPtr = Allocator.Allocate<char>(Length + 1);
251 
252     memcpy(TextPtr, WordText.c_str(), Length + 1);
253     StringRef Text = StringRef(TextPtr, Length);
254 
255     formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
256     return true;
257   }
258 
259   bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
260     if (isEnd())
261       return false;
262 
263     Position SavedPos = Pos;
264 
265     consumeWhitespace();
266     SmallString<32> WordText;
267     const char *WordBegin = Pos.BufferPtr;
268     SourceLocation Loc = getSourceLocation();
269     bool Error = false;
270     if (!isEnd()) {
271       const char C = peek();
272       if (C == OpenDelim) {
273         WordText.push_back(C);
274         consumeChar();
275       } else
276         Error = true;
277     }
278     char C = '\0';
279     while (!Error && !isEnd()) {
280       C = peek();
281       WordText.push_back(C);
282       consumeChar();
283       if (C == CloseDelim)
284         break;
285     }
286     if (!Error && C != CloseDelim)
287       Error = true;
288 
289     if (Error) {
290       Pos = SavedPos;
291       return false;
292     }
293 
294     const unsigned Length = WordText.size();
295     char *TextPtr = Allocator.Allocate<char>(Length + 1);
296 
297     memcpy(TextPtr, WordText.c_str(), Length + 1);
298     StringRef Text = StringRef(TextPtr, Length);
299 
300     formTokenWithChars(Tok, Loc, WordBegin,
301                        Pos.BufferPtr - WordBegin, Text);
302     return true;
303   }
304 
305   /// Put back tokens that we didn't consume.
306   void putBackLeftoverTokens() {
307     if (isEnd())
308       return;
309 
310     bool HavePartialTok = false;
311     Token PartialTok;
312     if (Pos.BufferPtr != Pos.BufferStart) {
313       formTokenWithChars(PartialTok, getSourceLocation(),
314                          Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
315                          StringRef(Pos.BufferPtr,
316                                    Pos.BufferEnd - Pos.BufferPtr));
317       HavePartialTok = true;
318       Pos.CurToken++;
319     }
320 
321     P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
322     Pos.CurToken = Toks.size();
323 
324     if (HavePartialTok)
325       P.putBack(PartialTok);
326   }
327 };
328 
329 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
330                const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
331                const CommandTraits &Traits):
332     L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
333     Traits(Traits) {
334   consumeToken();
335 }
336 
337 void Parser::parseParamCommandArgs(ParamCommandComment *PC,
338                                    TextTokenRetokenizer &Retokenizer) {
339   Token Arg;
340   // Check if argument looks like direction specification: [dir]
341   // e.g., [in], [out], [in,out]
342   if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
343     S.actOnParamCommandDirectionArg(PC,
344                                     Arg.getLocation(),
345                                     Arg.getEndLocation(),
346                                     Arg.getText());
347 
348   if (Retokenizer.lexWord(Arg))
349     S.actOnParamCommandParamNameArg(PC,
350                                     Arg.getLocation(),
351                                     Arg.getEndLocation(),
352                                     Arg.getText());
353 }
354 
355 void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
356                                     TextTokenRetokenizer &Retokenizer) {
357   Token Arg;
358   if (Retokenizer.lexWord(Arg))
359     S.actOnTParamCommandParamNameArg(TPC,
360                                      Arg.getLocation(),
361                                      Arg.getEndLocation(),
362                                      Arg.getText());
363 }
364 
365 ArrayRef<Comment::Argument>
366 Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
367   auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
368       Comment::Argument[NumArgs];
369   unsigned ParsedArgs = 0;
370   Token Arg;
371   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
372     Args[ParsedArgs] = Comment::Argument{
373         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
374     ParsedArgs++;
375   }
376 
377   return llvm::ArrayRef(Args, ParsedArgs);
378 }
379 
380 ArrayRef<Comment::Argument>
381 Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
382                               unsigned NumArgs) {
383   auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
384       Comment::Argument[NumArgs];
385   unsigned ParsedArgs = 0;
386   Token Arg;
387 
388   while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) {
389     Args[ParsedArgs] = Comment::Argument{
390         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
391     ParsedArgs++;
392   }
393 
394   return llvm::ArrayRef(Args, ParsedArgs);
395 }
396 
397 BlockCommandComment *Parser::parseBlockCommand() {
398   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
399 
400   ParamCommandComment *PC = nullptr;
401   TParamCommandComment *TPC = nullptr;
402   BlockCommandComment *BC = nullptr;
403   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
404   CommandMarkerKind CommandMarker =
405       Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
406   if (Info->IsParamCommand) {
407     PC = S.actOnParamCommandStart(Tok.getLocation(),
408                                   Tok.getEndLocation(),
409                                   Tok.getCommandID(),
410                                   CommandMarker);
411   } else if (Info->IsTParamCommand) {
412     TPC = S.actOnTParamCommandStart(Tok.getLocation(),
413                                     Tok.getEndLocation(),
414                                     Tok.getCommandID(),
415                                     CommandMarker);
416   } else {
417     BC = S.actOnBlockCommandStart(Tok.getLocation(),
418                                   Tok.getEndLocation(),
419                                   Tok.getCommandID(),
420                                   CommandMarker);
421   }
422   consumeToken();
423 
424   if (isTokBlockCommand()) {
425     // Block command ahead.  We can't nest block commands, so pretend that this
426     // command has an empty argument.
427     ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt);
428     if (PC) {
429       S.actOnParamCommandFinish(PC, Paragraph);
430       return PC;
431     } else if (TPC) {
432       S.actOnTParamCommandFinish(TPC, Paragraph);
433       return TPC;
434     } else {
435       S.actOnBlockCommandFinish(BC, Paragraph);
436       return BC;
437     }
438   }
439 
440   if (PC || TPC || Info->NumArgs > 0) {
441     // In order to parse command arguments we need to retokenize a few
442     // following text tokens.
443     TextTokenRetokenizer Retokenizer(Allocator, *this);
444 
445     if (PC)
446       parseParamCommandArgs(PC, Retokenizer);
447     else if (TPC)
448       parseTParamCommandArgs(TPC, Retokenizer);
449     else if (Info->IsThrowsCommand)
450       S.actOnBlockCommandArgs(
451           BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));
452     else
453       S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));
454 
455     Retokenizer.putBackLeftoverTokens();
456   }
457 
458   // If there's a block command ahead, we will attach an empty paragraph to
459   // this command.
460   bool EmptyParagraph = false;
461   if (isTokBlockCommand())
462     EmptyParagraph = true;
463   else if (Tok.is(tok::newline)) {
464     Token PrevTok = Tok;
465     consumeToken();
466     EmptyParagraph = isTokBlockCommand();
467     putBack(PrevTok);
468   }
469 
470   ParagraphComment *Paragraph;
471   if (EmptyParagraph)
472     Paragraph = S.actOnParagraphComment(std::nullopt);
473   else {
474     BlockContentComment *Block = parseParagraphOrBlockCommand();
475     // Since we have checked for a block command, we should have parsed a
476     // paragraph.
477     Paragraph = cast<ParagraphComment>(Block);
478   }
479 
480   if (PC) {
481     S.actOnParamCommandFinish(PC, Paragraph);
482     return PC;
483   } else if (TPC) {
484     S.actOnTParamCommandFinish(TPC, Paragraph);
485     return TPC;
486   } else {
487     S.actOnBlockCommandFinish(BC, Paragraph);
488     return BC;
489   }
490 }
491 
492 InlineCommandComment *Parser::parseInlineCommand() {
493   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
494   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
495 
496   const Token CommandTok = Tok;
497   consumeToken();
498 
499   TextTokenRetokenizer Retokenizer(Allocator, *this);
500   ArrayRef<Comment::Argument> Args =
501       parseCommandArgs(Retokenizer, Info->NumArgs);
502 
503   InlineCommandComment *IC = S.actOnInlineCommand(
504       CommandTok.getLocation(), CommandTok.getEndLocation(),
505       CommandTok.getCommandID(), Args);
506 
507   if (Args.size() < Info->NumArgs) {
508     Diag(CommandTok.getEndLocation().getLocWithOffset(1),
509          diag::warn_doc_inline_command_not_enough_arguments)
510         << CommandTok.is(tok::at_command) << Info->Name << Args.size()
511         << Info->NumArgs
512         << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
513   }
514 
515   Retokenizer.putBackLeftoverTokens();
516 
517   return IC;
518 }
519 
520 HTMLStartTagComment *Parser::parseHTMLStartTag() {
521   assert(Tok.is(tok::html_start_tag));
522   HTMLStartTagComment *HST =
523       S.actOnHTMLStartTagStart(Tok.getLocation(),
524                                Tok.getHTMLTagStartName());
525   consumeToken();
526 
527   SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
528   while (true) {
529     switch (Tok.getKind()) {
530     case tok::html_ident: {
531       Token Ident = Tok;
532       consumeToken();
533       if (Tok.isNot(tok::html_equals)) {
534         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
535                                                        Ident.getHTMLIdent()));
536         continue;
537       }
538       Token Equals = Tok;
539       consumeToken();
540       if (Tok.isNot(tok::html_quoted_string)) {
541         Diag(Tok.getLocation(),
542              diag::warn_doc_html_start_tag_expected_quoted_string)
543           << SourceRange(Equals.getLocation());
544         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
545                                                        Ident.getHTMLIdent()));
546         while (Tok.is(tok::html_equals) ||
547                Tok.is(tok::html_quoted_string))
548           consumeToken();
549         continue;
550       }
551       Attrs.push_back(HTMLStartTagComment::Attribute(
552                               Ident.getLocation(),
553                               Ident.getHTMLIdent(),
554                               Equals.getLocation(),
555                               SourceRange(Tok.getLocation(),
556                                           Tok.getEndLocation()),
557                               Tok.getHTMLQuotedString()));
558       consumeToken();
559       continue;
560     }
561 
562     case tok::html_greater:
563       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
564                                 Tok.getLocation(),
565                                 /* IsSelfClosing = */ false);
566       consumeToken();
567       return HST;
568 
569     case tok::html_slash_greater:
570       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
571                                 Tok.getLocation(),
572                                 /* IsSelfClosing = */ true);
573       consumeToken();
574       return HST;
575 
576     case tok::html_equals:
577     case tok::html_quoted_string:
578       Diag(Tok.getLocation(),
579            diag::warn_doc_html_start_tag_expected_ident_or_greater);
580       while (Tok.is(tok::html_equals) ||
581              Tok.is(tok::html_quoted_string))
582         consumeToken();
583       if (Tok.is(tok::html_ident) ||
584           Tok.is(tok::html_greater) ||
585           Tok.is(tok::html_slash_greater))
586         continue;
587 
588       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
589                                 SourceLocation(),
590                                 /* IsSelfClosing = */ false);
591       return HST;
592 
593     default:
594       // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.
595       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
596                                 SourceLocation(),
597                                 /* IsSelfClosing = */ false);
598       bool StartLineInvalid;
599       const unsigned StartLine = SourceMgr.getPresumedLineNumber(
600                                                   HST->getLocation(),
601                                                   &StartLineInvalid);
602       bool EndLineInvalid;
603       const unsigned EndLine = SourceMgr.getPresumedLineNumber(
604                                                   Tok.getLocation(),
605                                                   &EndLineInvalid);
606       if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
607         Diag(Tok.getLocation(),
608              diag::warn_doc_html_start_tag_expected_ident_or_greater)
609           << HST->getSourceRange();
610       else {
611         Diag(Tok.getLocation(),
612              diag::warn_doc_html_start_tag_expected_ident_or_greater);
613         Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
614           << HST->getSourceRange();
615       }
616       return HST;
617     }
618   }
619 }
620 
621 HTMLEndTagComment *Parser::parseHTMLEndTag() {
622   assert(Tok.is(tok::html_end_tag));
623   Token TokEndTag = Tok;
624   consumeToken();
625   SourceLocation Loc;
626   if (Tok.is(tok::html_greater)) {
627     Loc = Tok.getLocation();
628     consumeToken();
629   }
630 
631   return S.actOnHTMLEndTag(TokEndTag.getLocation(),
632                            Loc,
633                            TokEndTag.getHTMLTagEndName());
634 }
635 
636 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
637   SmallVector<InlineContentComment *, 8> Content;
638 
639   while (true) {
640     switch (Tok.getKind()) {
641     case tok::verbatim_block_begin:
642     case tok::verbatim_line_name:
643     case tok::eof:
644       break; // Block content or EOF ahead, finish this parapgaph.
645 
646     case tok::unknown_command:
647       Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
648                                               Tok.getEndLocation(),
649                                               Tok.getUnknownCommandName()));
650       consumeToken();
651       continue;
652 
653     case tok::backslash_command:
654     case tok::at_command: {
655       const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
656       if (Info->IsBlockCommand) {
657         if (Content.size() == 0)
658           return parseBlockCommand();
659         break; // Block command ahead, finish this parapgaph.
660       }
661       if (Info->IsVerbatimBlockEndCommand) {
662         Diag(Tok.getLocation(),
663              diag::warn_verbatim_block_end_without_start)
664           << Tok.is(tok::at_command)
665           << Info->Name
666           << SourceRange(Tok.getLocation(), Tok.getEndLocation());
667         consumeToken();
668         continue;
669       }
670       if (Info->IsUnknownCommand) {
671         Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
672                                                 Tok.getEndLocation(),
673                                                 Info->getID()));
674         consumeToken();
675         continue;
676       }
677       assert(Info->IsInlineCommand);
678       Content.push_back(parseInlineCommand());
679       continue;
680     }
681 
682     case tok::newline: {
683       consumeToken();
684       if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
685         consumeToken();
686         break; // Two newlines -- end of paragraph.
687       }
688       // Also allow [tok::newline, tok::text, tok::newline] if the middle
689       // tok::text is just whitespace.
690       if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
691         Token WhitespaceTok = Tok;
692         consumeToken();
693         if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
694           consumeToken();
695           break;
696         }
697         // We have [tok::newline, tok::text, non-newline].  Put back tok::text.
698         putBack(WhitespaceTok);
699       }
700       if (Content.size() > 0)
701         Content.back()->addTrailingNewline();
702       continue;
703     }
704 
705     // Don't deal with HTML tag soup now.
706     case tok::html_start_tag:
707       Content.push_back(parseHTMLStartTag());
708       continue;
709 
710     case tok::html_end_tag:
711       Content.push_back(parseHTMLEndTag());
712       continue;
713 
714     case tok::text:
715       Content.push_back(S.actOnText(Tok.getLocation(),
716                                     Tok.getEndLocation(),
717                                     Tok.getText()));
718       consumeToken();
719       continue;
720 
721     case tok::verbatim_block_line:
722     case tok::verbatim_block_end:
723     case tok::verbatim_line_text:
724     case tok::html_ident:
725     case tok::html_equals:
726     case tok::html_quoted_string:
727     case tok::html_greater:
728     case tok::html_slash_greater:
729       llvm_unreachable("should not see this token");
730     }
731     break;
732   }
733 
734   return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content)));
735 }
736 
737 VerbatimBlockComment *Parser::parseVerbatimBlock() {
738   assert(Tok.is(tok::verbatim_block_begin));
739 
740   VerbatimBlockComment *VB =
741       S.actOnVerbatimBlockStart(Tok.getLocation(),
742                                 Tok.getVerbatimBlockID());
743   consumeToken();
744 
745   // Don't create an empty line if verbatim opening command is followed
746   // by a newline.
747   if (Tok.is(tok::newline))
748     consumeToken();
749 
750   SmallVector<VerbatimBlockLineComment *, 8> Lines;
751   while (Tok.is(tok::verbatim_block_line) ||
752          Tok.is(tok::newline)) {
753     VerbatimBlockLineComment *Line;
754     if (Tok.is(tok::verbatim_block_line)) {
755       Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
756                                       Tok.getVerbatimBlockText());
757       consumeToken();
758       if (Tok.is(tok::newline)) {
759         consumeToken();
760       }
761     } else {
762       // Empty line, just a tok::newline.
763       Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
764       consumeToken();
765     }
766     Lines.push_back(Line);
767   }
768 
769   if (Tok.is(tok::verbatim_block_end)) {
770     const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
771     S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name,
772                                S.copyArray(llvm::ArrayRef(Lines)));
773     consumeToken();
774   } else {
775     // Unterminated \\verbatim block
776     S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
777                                S.copyArray(llvm::ArrayRef(Lines)));
778   }
779 
780   return VB;
781 }
782 
783 VerbatimLineComment *Parser::parseVerbatimLine() {
784   assert(Tok.is(tok::verbatim_line_name));
785 
786   Token NameTok = Tok;
787   consumeToken();
788 
789   SourceLocation TextBegin;
790   StringRef Text;
791   // Next token might not be a tok::verbatim_line_text if verbatim line
792   // starting command comes just before a newline or comment end.
793   if (Tok.is(tok::verbatim_line_text)) {
794     TextBegin = Tok.getLocation();
795     Text = Tok.getVerbatimLineText();
796   } else {
797     TextBegin = NameTok.getEndLocation();
798     Text = "";
799   }
800 
801   VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
802                                                 NameTok.getVerbatimLineID(),
803                                                 TextBegin,
804                                                 Text);
805   consumeToken();
806   return VL;
807 }
808 
809 BlockContentComment *Parser::parseBlockContent() {
810   switch (Tok.getKind()) {
811   case tok::text:
812   case tok::unknown_command:
813   case tok::backslash_command:
814   case tok::at_command:
815   case tok::html_start_tag:
816   case tok::html_end_tag:
817     return parseParagraphOrBlockCommand();
818 
819   case tok::verbatim_block_begin:
820     return parseVerbatimBlock();
821 
822   case tok::verbatim_line_name:
823     return parseVerbatimLine();
824 
825   case tok::eof:
826   case tok::newline:
827   case tok::verbatim_block_line:
828   case tok::verbatim_block_end:
829   case tok::verbatim_line_text:
830   case tok::html_ident:
831   case tok::html_equals:
832   case tok::html_quoted_string:
833   case tok::html_greater:
834   case tok::html_slash_greater:
835     llvm_unreachable("should not see this token");
836   }
837   llvm_unreachable("bogus token kind");
838 }
839 
840 FullComment *Parser::parseFullComment() {
841   // Skip newlines at the beginning of the comment.
842   while (Tok.is(tok::newline))
843     consumeToken();
844 
845   SmallVector<BlockContentComment *, 8> Blocks;
846   while (Tok.isNot(tok::eof)) {
847     Blocks.push_back(parseBlockContent());
848 
849     // Skip extra newlines after paragraph end.
850     while (Tok.is(tok::newline))
851       consumeToken();
852   }
853   return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks)));
854 }
855 
856 } // end namespace comments
857 } // end namespace clang
858