xref: /openbsd-src/gnu/llvm/clang/lib/AST/CommentParser.cpp (revision 12c855180aad702bbcca06e0398d774beeafb155)
1e5dd7070Spatrick //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick 
9e5dd7070Spatrick #include "clang/AST/CommentParser.h"
10e5dd7070Spatrick #include "clang/AST/CommentCommandTraits.h"
11e5dd7070Spatrick #include "clang/AST/CommentDiagnostic.h"
12e5dd7070Spatrick #include "clang/AST/CommentSema.h"
13e5dd7070Spatrick #include "clang/Basic/CharInfo.h"
14e5dd7070Spatrick #include "clang/Basic/SourceManager.h"
15e5dd7070Spatrick #include "llvm/Support/ErrorHandling.h"
16e5dd7070Spatrick 
17e5dd7070Spatrick namespace clang {
18e5dd7070Spatrick 
isWhitespace(llvm::StringRef S)19e5dd7070Spatrick static inline bool isWhitespace(llvm::StringRef S) {
20e5dd7070Spatrick   for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21e5dd7070Spatrick     if (!isWhitespace(*I))
22e5dd7070Spatrick       return false;
23e5dd7070Spatrick   }
24e5dd7070Spatrick   return true;
25e5dd7070Spatrick }
26e5dd7070Spatrick 
27e5dd7070Spatrick namespace comments {
28e5dd7070Spatrick 
29e5dd7070Spatrick /// Re-lexes a sequence of tok::text tokens.
30e5dd7070Spatrick class TextTokenRetokenizer {
31e5dd7070Spatrick   llvm::BumpPtrAllocator &Allocator;
32e5dd7070Spatrick   Parser &P;
33e5dd7070Spatrick 
34e5dd7070Spatrick   /// This flag is set when there are no more tokens we can fetch from lexer.
35e5dd7070Spatrick   bool NoMoreInterestingTokens;
36e5dd7070Spatrick 
37e5dd7070Spatrick   /// Token buffer: tokens we have processed and lookahead.
38e5dd7070Spatrick   SmallVector<Token, 16> Toks;
39e5dd7070Spatrick 
40e5dd7070Spatrick   /// A position in \c Toks.
41e5dd7070Spatrick   struct Position {
42e5dd7070Spatrick     const char *BufferStart;
43e5dd7070Spatrick     const char *BufferEnd;
44e5dd7070Spatrick     const char *BufferPtr;
45e5dd7070Spatrick     SourceLocation BufferStartLoc;
46e5dd7070Spatrick     unsigned CurToken;
47e5dd7070Spatrick   };
48e5dd7070Spatrick 
49e5dd7070Spatrick   /// Current position in Toks.
50e5dd7070Spatrick   Position Pos;
51e5dd7070Spatrick 
isEnd() const52e5dd7070Spatrick   bool isEnd() const {
53e5dd7070Spatrick     return Pos.CurToken >= Toks.size();
54e5dd7070Spatrick   }
55e5dd7070Spatrick 
56e5dd7070Spatrick   /// Sets up the buffer pointers to point to current token.
setupBuffer()57e5dd7070Spatrick   void setupBuffer() {
58e5dd7070Spatrick     assert(!isEnd());
59e5dd7070Spatrick     const Token &Tok = Toks[Pos.CurToken];
60e5dd7070Spatrick 
61e5dd7070Spatrick     Pos.BufferStart = Tok.getText().begin();
62e5dd7070Spatrick     Pos.BufferEnd = Tok.getText().end();
63e5dd7070Spatrick     Pos.BufferPtr = Pos.BufferStart;
64e5dd7070Spatrick     Pos.BufferStartLoc = Tok.getLocation();
65e5dd7070Spatrick   }
66e5dd7070Spatrick 
getSourceLocation() const67e5dd7070Spatrick   SourceLocation getSourceLocation() const {
68e5dd7070Spatrick     const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69e5dd7070Spatrick     return Pos.BufferStartLoc.getLocWithOffset(CharNo);
70e5dd7070Spatrick   }
71e5dd7070Spatrick 
peek() const72e5dd7070Spatrick   char peek() const {
73e5dd7070Spatrick     assert(!isEnd());
74e5dd7070Spatrick     assert(Pos.BufferPtr != Pos.BufferEnd);
75e5dd7070Spatrick     return *Pos.BufferPtr;
76e5dd7070Spatrick   }
77e5dd7070Spatrick 
consumeChar()78e5dd7070Spatrick   void consumeChar() {
79e5dd7070Spatrick     assert(!isEnd());
80e5dd7070Spatrick     assert(Pos.BufferPtr != Pos.BufferEnd);
81e5dd7070Spatrick     Pos.BufferPtr++;
82e5dd7070Spatrick     if (Pos.BufferPtr == Pos.BufferEnd) {
83e5dd7070Spatrick       Pos.CurToken++;
84e5dd7070Spatrick       if (isEnd() && !addToken())
85e5dd7070Spatrick         return;
86e5dd7070Spatrick 
87e5dd7070Spatrick       assert(!isEnd());
88e5dd7070Spatrick       setupBuffer();
89e5dd7070Spatrick     }
90e5dd7070Spatrick   }
91e5dd7070Spatrick 
92e5dd7070Spatrick   /// Add a token.
93e5dd7070Spatrick   /// Returns true on success, false if there are no interesting tokens to
94e5dd7070Spatrick   /// fetch from lexer.
addToken()95e5dd7070Spatrick   bool addToken() {
96e5dd7070Spatrick     if (NoMoreInterestingTokens)
97e5dd7070Spatrick       return false;
98e5dd7070Spatrick 
99e5dd7070Spatrick     if (P.Tok.is(tok::newline)) {
100e5dd7070Spatrick       // If we see a single newline token between text tokens, skip it.
101e5dd7070Spatrick       Token Newline = P.Tok;
102e5dd7070Spatrick       P.consumeToken();
103e5dd7070Spatrick       if (P.Tok.isNot(tok::text)) {
104e5dd7070Spatrick         P.putBack(Newline);
105e5dd7070Spatrick         NoMoreInterestingTokens = true;
106e5dd7070Spatrick         return false;
107e5dd7070Spatrick       }
108e5dd7070Spatrick     }
109e5dd7070Spatrick     if (P.Tok.isNot(tok::text)) {
110e5dd7070Spatrick       NoMoreInterestingTokens = true;
111e5dd7070Spatrick       return false;
112e5dd7070Spatrick     }
113e5dd7070Spatrick 
114e5dd7070Spatrick     Toks.push_back(P.Tok);
115e5dd7070Spatrick     P.consumeToken();
116e5dd7070Spatrick     if (Toks.size() == 1)
117e5dd7070Spatrick       setupBuffer();
118e5dd7070Spatrick     return true;
119e5dd7070Spatrick   }
120e5dd7070Spatrick 
consumeWhitespace()121e5dd7070Spatrick   void consumeWhitespace() {
122e5dd7070Spatrick     while (!isEnd()) {
123e5dd7070Spatrick       if (isWhitespace(peek()))
124e5dd7070Spatrick         consumeChar();
125e5dd7070Spatrick       else
126e5dd7070Spatrick         break;
127e5dd7070Spatrick     }
128e5dd7070Spatrick   }
129e5dd7070Spatrick 
formTokenWithChars(Token & Result,SourceLocation Loc,const char * TokBegin,unsigned TokLength,StringRef Text)130e5dd7070Spatrick   void formTokenWithChars(Token &Result,
131e5dd7070Spatrick                           SourceLocation Loc,
132e5dd7070Spatrick                           const char *TokBegin,
133e5dd7070Spatrick                           unsigned TokLength,
134e5dd7070Spatrick                           StringRef Text) {
135e5dd7070Spatrick     Result.setLocation(Loc);
136e5dd7070Spatrick     Result.setKind(tok::text);
137e5dd7070Spatrick     Result.setLength(TokLength);
138e5dd7070Spatrick #ifndef NDEBUG
139e5dd7070Spatrick     Result.TextPtr = "<UNSET>";
140e5dd7070Spatrick     Result.IntVal = 7;
141e5dd7070Spatrick #endif
142e5dd7070Spatrick     Result.setText(Text);
143e5dd7070Spatrick   }
144e5dd7070Spatrick 
145e5dd7070Spatrick public:
TextTokenRetokenizer(llvm::BumpPtrAllocator & Allocator,Parser & P)146e5dd7070Spatrick   TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
147e5dd7070Spatrick       Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
148e5dd7070Spatrick     Pos.CurToken = 0;
149e5dd7070Spatrick     addToken();
150e5dd7070Spatrick   }
151e5dd7070Spatrick 
152e5dd7070Spatrick   /// Extract a word -- sequence of non-whitespace characters.
lexWord(Token & Tok)153e5dd7070Spatrick   bool lexWord(Token &Tok) {
154e5dd7070Spatrick     if (isEnd())
155e5dd7070Spatrick       return false;
156e5dd7070Spatrick 
157e5dd7070Spatrick     Position SavedPos = Pos;
158e5dd7070Spatrick 
159e5dd7070Spatrick     consumeWhitespace();
160e5dd7070Spatrick     SmallString<32> WordText;
161e5dd7070Spatrick     const char *WordBegin = Pos.BufferPtr;
162e5dd7070Spatrick     SourceLocation Loc = getSourceLocation();
163e5dd7070Spatrick     while (!isEnd()) {
164e5dd7070Spatrick       const char C = peek();
165e5dd7070Spatrick       if (!isWhitespace(C)) {
166e5dd7070Spatrick         WordText.push_back(C);
167e5dd7070Spatrick         consumeChar();
168e5dd7070Spatrick       } else
169e5dd7070Spatrick         break;
170e5dd7070Spatrick     }
171e5dd7070Spatrick     const unsigned Length = WordText.size();
172e5dd7070Spatrick     if (Length == 0) {
173e5dd7070Spatrick       Pos = SavedPos;
174e5dd7070Spatrick       return false;
175e5dd7070Spatrick     }
176e5dd7070Spatrick 
177e5dd7070Spatrick     char *TextPtr = Allocator.Allocate<char>(Length + 1);
178e5dd7070Spatrick 
179e5dd7070Spatrick     memcpy(TextPtr, WordText.c_str(), Length + 1);
180e5dd7070Spatrick     StringRef Text = StringRef(TextPtr, Length);
181e5dd7070Spatrick 
182e5dd7070Spatrick     formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
183e5dd7070Spatrick     return true;
184e5dd7070Spatrick   }
185e5dd7070Spatrick 
lexDelimitedSeq(Token & Tok,char OpenDelim,char CloseDelim)186e5dd7070Spatrick   bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
187e5dd7070Spatrick     if (isEnd())
188e5dd7070Spatrick       return false;
189e5dd7070Spatrick 
190e5dd7070Spatrick     Position SavedPos = Pos;
191e5dd7070Spatrick 
192e5dd7070Spatrick     consumeWhitespace();
193e5dd7070Spatrick     SmallString<32> WordText;
194e5dd7070Spatrick     const char *WordBegin = Pos.BufferPtr;
195e5dd7070Spatrick     SourceLocation Loc = getSourceLocation();
196e5dd7070Spatrick     bool Error = false;
197e5dd7070Spatrick     if (!isEnd()) {
198e5dd7070Spatrick       const char C = peek();
199e5dd7070Spatrick       if (C == OpenDelim) {
200e5dd7070Spatrick         WordText.push_back(C);
201e5dd7070Spatrick         consumeChar();
202e5dd7070Spatrick       } else
203e5dd7070Spatrick         Error = true;
204e5dd7070Spatrick     }
205e5dd7070Spatrick     char C = '\0';
206e5dd7070Spatrick     while (!Error && !isEnd()) {
207e5dd7070Spatrick       C = peek();
208e5dd7070Spatrick       WordText.push_back(C);
209e5dd7070Spatrick       consumeChar();
210e5dd7070Spatrick       if (C == CloseDelim)
211e5dd7070Spatrick         break;
212e5dd7070Spatrick     }
213e5dd7070Spatrick     if (!Error && C != CloseDelim)
214e5dd7070Spatrick       Error = true;
215e5dd7070Spatrick 
216e5dd7070Spatrick     if (Error) {
217e5dd7070Spatrick       Pos = SavedPos;
218e5dd7070Spatrick       return false;
219e5dd7070Spatrick     }
220e5dd7070Spatrick 
221e5dd7070Spatrick     const unsigned Length = WordText.size();
222e5dd7070Spatrick     char *TextPtr = Allocator.Allocate<char>(Length + 1);
223e5dd7070Spatrick 
224e5dd7070Spatrick     memcpy(TextPtr, WordText.c_str(), Length + 1);
225e5dd7070Spatrick     StringRef Text = StringRef(TextPtr, Length);
226e5dd7070Spatrick 
227e5dd7070Spatrick     formTokenWithChars(Tok, Loc, WordBegin,
228e5dd7070Spatrick                        Pos.BufferPtr - WordBegin, Text);
229e5dd7070Spatrick     return true;
230e5dd7070Spatrick   }
231e5dd7070Spatrick 
232e5dd7070Spatrick   /// Put back tokens that we didn't consume.
putBackLeftoverTokens()233e5dd7070Spatrick   void putBackLeftoverTokens() {
234e5dd7070Spatrick     if (isEnd())
235e5dd7070Spatrick       return;
236e5dd7070Spatrick 
237e5dd7070Spatrick     bool HavePartialTok = false;
238e5dd7070Spatrick     Token PartialTok;
239e5dd7070Spatrick     if (Pos.BufferPtr != Pos.BufferStart) {
240e5dd7070Spatrick       formTokenWithChars(PartialTok, getSourceLocation(),
241e5dd7070Spatrick                          Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
242e5dd7070Spatrick                          StringRef(Pos.BufferPtr,
243e5dd7070Spatrick                                    Pos.BufferEnd - Pos.BufferPtr));
244e5dd7070Spatrick       HavePartialTok = true;
245e5dd7070Spatrick       Pos.CurToken++;
246e5dd7070Spatrick     }
247e5dd7070Spatrick 
248*12c85518Srobert     P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
249e5dd7070Spatrick     Pos.CurToken = Toks.size();
250e5dd7070Spatrick 
251e5dd7070Spatrick     if (HavePartialTok)
252e5dd7070Spatrick       P.putBack(PartialTok);
253e5dd7070Spatrick   }
254e5dd7070Spatrick };
255e5dd7070Spatrick 
Parser(Lexer & L,Sema & S,llvm::BumpPtrAllocator & Allocator,const SourceManager & SourceMgr,DiagnosticsEngine & Diags,const CommandTraits & Traits)256e5dd7070Spatrick Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
257e5dd7070Spatrick                const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
258e5dd7070Spatrick                const CommandTraits &Traits):
259e5dd7070Spatrick     L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
260e5dd7070Spatrick     Traits(Traits) {
261e5dd7070Spatrick   consumeToken();
262e5dd7070Spatrick }
263e5dd7070Spatrick 
parseParamCommandArgs(ParamCommandComment * PC,TextTokenRetokenizer & Retokenizer)264e5dd7070Spatrick void Parser::parseParamCommandArgs(ParamCommandComment *PC,
265e5dd7070Spatrick                                    TextTokenRetokenizer &Retokenizer) {
266e5dd7070Spatrick   Token Arg;
267e5dd7070Spatrick   // Check if argument looks like direction specification: [dir]
268e5dd7070Spatrick   // e.g., [in], [out], [in,out]
269e5dd7070Spatrick   if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
270e5dd7070Spatrick     S.actOnParamCommandDirectionArg(PC,
271e5dd7070Spatrick                                     Arg.getLocation(),
272e5dd7070Spatrick                                     Arg.getEndLocation(),
273e5dd7070Spatrick                                     Arg.getText());
274e5dd7070Spatrick 
275e5dd7070Spatrick   if (Retokenizer.lexWord(Arg))
276e5dd7070Spatrick     S.actOnParamCommandParamNameArg(PC,
277e5dd7070Spatrick                                     Arg.getLocation(),
278e5dd7070Spatrick                                     Arg.getEndLocation(),
279e5dd7070Spatrick                                     Arg.getText());
280e5dd7070Spatrick }
281e5dd7070Spatrick 
parseTParamCommandArgs(TParamCommandComment * TPC,TextTokenRetokenizer & Retokenizer)282e5dd7070Spatrick void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
283e5dd7070Spatrick                                     TextTokenRetokenizer &Retokenizer) {
284e5dd7070Spatrick   Token Arg;
285e5dd7070Spatrick   if (Retokenizer.lexWord(Arg))
286e5dd7070Spatrick     S.actOnTParamCommandParamNameArg(TPC,
287e5dd7070Spatrick                                      Arg.getLocation(),
288e5dd7070Spatrick                                      Arg.getEndLocation(),
289e5dd7070Spatrick                                      Arg.getText());
290e5dd7070Spatrick }
291e5dd7070Spatrick 
292*12c85518Srobert ArrayRef<Comment::Argument>
parseCommandArgs(TextTokenRetokenizer & Retokenizer,unsigned NumArgs)293*12c85518Srobert Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) {
294*12c85518Srobert   auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
295*12c85518Srobert       Comment::Argument[NumArgs];
296e5dd7070Spatrick   unsigned ParsedArgs = 0;
297e5dd7070Spatrick   Token Arg;
298e5dd7070Spatrick   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
299*12c85518Srobert     Args[ParsedArgs] = Comment::Argument{
300*12c85518Srobert         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
301e5dd7070Spatrick     ParsedArgs++;
302e5dd7070Spatrick   }
303e5dd7070Spatrick 
304*12c85518Srobert   return llvm::ArrayRef(Args, ParsedArgs);
305e5dd7070Spatrick }
306e5dd7070Spatrick 
parseBlockCommand()307e5dd7070Spatrick BlockCommandComment *Parser::parseBlockCommand() {
308e5dd7070Spatrick   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
309e5dd7070Spatrick 
310e5dd7070Spatrick   ParamCommandComment *PC = nullptr;
311e5dd7070Spatrick   TParamCommandComment *TPC = nullptr;
312e5dd7070Spatrick   BlockCommandComment *BC = nullptr;
313e5dd7070Spatrick   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
314e5dd7070Spatrick   CommandMarkerKind CommandMarker =
315e5dd7070Spatrick       Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
316e5dd7070Spatrick   if (Info->IsParamCommand) {
317e5dd7070Spatrick     PC = S.actOnParamCommandStart(Tok.getLocation(),
318e5dd7070Spatrick                                   Tok.getEndLocation(),
319e5dd7070Spatrick                                   Tok.getCommandID(),
320e5dd7070Spatrick                                   CommandMarker);
321e5dd7070Spatrick   } else if (Info->IsTParamCommand) {
322e5dd7070Spatrick     TPC = S.actOnTParamCommandStart(Tok.getLocation(),
323e5dd7070Spatrick                                     Tok.getEndLocation(),
324e5dd7070Spatrick                                     Tok.getCommandID(),
325e5dd7070Spatrick                                     CommandMarker);
326e5dd7070Spatrick   } else {
327e5dd7070Spatrick     BC = S.actOnBlockCommandStart(Tok.getLocation(),
328e5dd7070Spatrick                                   Tok.getEndLocation(),
329e5dd7070Spatrick                                   Tok.getCommandID(),
330e5dd7070Spatrick                                   CommandMarker);
331e5dd7070Spatrick   }
332e5dd7070Spatrick   consumeToken();
333e5dd7070Spatrick 
334e5dd7070Spatrick   if (isTokBlockCommand()) {
335e5dd7070Spatrick     // Block command ahead.  We can't nest block commands, so pretend that this
336e5dd7070Spatrick     // command has an empty argument.
337*12c85518Srobert     ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt);
338e5dd7070Spatrick     if (PC) {
339e5dd7070Spatrick       S.actOnParamCommandFinish(PC, Paragraph);
340e5dd7070Spatrick       return PC;
341e5dd7070Spatrick     } else if (TPC) {
342e5dd7070Spatrick       S.actOnTParamCommandFinish(TPC, Paragraph);
343e5dd7070Spatrick       return TPC;
344e5dd7070Spatrick     } else {
345e5dd7070Spatrick       S.actOnBlockCommandFinish(BC, Paragraph);
346e5dd7070Spatrick       return BC;
347e5dd7070Spatrick     }
348e5dd7070Spatrick   }
349e5dd7070Spatrick 
350e5dd7070Spatrick   if (PC || TPC || Info->NumArgs > 0) {
351e5dd7070Spatrick     // In order to parse command arguments we need to retokenize a few
352e5dd7070Spatrick     // following text tokens.
353e5dd7070Spatrick     TextTokenRetokenizer Retokenizer(Allocator, *this);
354e5dd7070Spatrick 
355e5dd7070Spatrick     if (PC)
356e5dd7070Spatrick       parseParamCommandArgs(PC, Retokenizer);
357e5dd7070Spatrick     else if (TPC)
358e5dd7070Spatrick       parseTParamCommandArgs(TPC, Retokenizer);
359e5dd7070Spatrick     else
360*12c85518Srobert       S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs));
361e5dd7070Spatrick 
362e5dd7070Spatrick     Retokenizer.putBackLeftoverTokens();
363e5dd7070Spatrick   }
364e5dd7070Spatrick 
365e5dd7070Spatrick   // If there's a block command ahead, we will attach an empty paragraph to
366e5dd7070Spatrick   // this command.
367e5dd7070Spatrick   bool EmptyParagraph = false;
368e5dd7070Spatrick   if (isTokBlockCommand())
369e5dd7070Spatrick     EmptyParagraph = true;
370e5dd7070Spatrick   else if (Tok.is(tok::newline)) {
371e5dd7070Spatrick     Token PrevTok = Tok;
372e5dd7070Spatrick     consumeToken();
373e5dd7070Spatrick     EmptyParagraph = isTokBlockCommand();
374e5dd7070Spatrick     putBack(PrevTok);
375e5dd7070Spatrick   }
376e5dd7070Spatrick 
377e5dd7070Spatrick   ParagraphComment *Paragraph;
378e5dd7070Spatrick   if (EmptyParagraph)
379*12c85518Srobert     Paragraph = S.actOnParagraphComment(std::nullopt);
380e5dd7070Spatrick   else {
381e5dd7070Spatrick     BlockContentComment *Block = parseParagraphOrBlockCommand();
382e5dd7070Spatrick     // Since we have checked for a block command, we should have parsed a
383e5dd7070Spatrick     // paragraph.
384e5dd7070Spatrick     Paragraph = cast<ParagraphComment>(Block);
385e5dd7070Spatrick   }
386e5dd7070Spatrick 
387e5dd7070Spatrick   if (PC) {
388e5dd7070Spatrick     S.actOnParamCommandFinish(PC, Paragraph);
389e5dd7070Spatrick     return PC;
390e5dd7070Spatrick   } else if (TPC) {
391e5dd7070Spatrick     S.actOnTParamCommandFinish(TPC, Paragraph);
392e5dd7070Spatrick     return TPC;
393e5dd7070Spatrick   } else {
394e5dd7070Spatrick     S.actOnBlockCommandFinish(BC, Paragraph);
395e5dd7070Spatrick     return BC;
396e5dd7070Spatrick   }
397e5dd7070Spatrick }
398e5dd7070Spatrick 
parseInlineCommand()399e5dd7070Spatrick InlineCommandComment *Parser::parseInlineCommand() {
400e5dd7070Spatrick   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
401*12c85518Srobert   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
402e5dd7070Spatrick 
403e5dd7070Spatrick   const Token CommandTok = Tok;
404e5dd7070Spatrick   consumeToken();
405e5dd7070Spatrick 
406e5dd7070Spatrick   TextTokenRetokenizer Retokenizer(Allocator, *this);
407*12c85518Srobert   ArrayRef<Comment::Argument> Args =
408*12c85518Srobert       parseCommandArgs(Retokenizer, Info->NumArgs);
409e5dd7070Spatrick 
410*12c85518Srobert   InlineCommandComment *IC = S.actOnInlineCommand(
411*12c85518Srobert       CommandTok.getLocation(), CommandTok.getEndLocation(),
412*12c85518Srobert       CommandTok.getCommandID(), Args);
413e5dd7070Spatrick 
414*12c85518Srobert   if (Args.size() < Info->NumArgs) {
415e5dd7070Spatrick     Diag(CommandTok.getEndLocation().getLocWithOffset(1),
416*12c85518Srobert          diag::warn_doc_inline_command_not_enough_arguments)
417*12c85518Srobert         << CommandTok.is(tok::at_command) << Info->Name << Args.size()
418*12c85518Srobert         << Info->NumArgs
419e5dd7070Spatrick         << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
420e5dd7070Spatrick   }
421e5dd7070Spatrick 
422e5dd7070Spatrick   Retokenizer.putBackLeftoverTokens();
423e5dd7070Spatrick 
424e5dd7070Spatrick   return IC;
425e5dd7070Spatrick }
426e5dd7070Spatrick 
parseHTMLStartTag()427e5dd7070Spatrick HTMLStartTagComment *Parser::parseHTMLStartTag() {
428e5dd7070Spatrick   assert(Tok.is(tok::html_start_tag));
429e5dd7070Spatrick   HTMLStartTagComment *HST =
430e5dd7070Spatrick       S.actOnHTMLStartTagStart(Tok.getLocation(),
431e5dd7070Spatrick                                Tok.getHTMLTagStartName());
432e5dd7070Spatrick   consumeToken();
433e5dd7070Spatrick 
434e5dd7070Spatrick   SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
435e5dd7070Spatrick   while (true) {
436e5dd7070Spatrick     switch (Tok.getKind()) {
437e5dd7070Spatrick     case tok::html_ident: {
438e5dd7070Spatrick       Token Ident = Tok;
439e5dd7070Spatrick       consumeToken();
440e5dd7070Spatrick       if (Tok.isNot(tok::html_equals)) {
441e5dd7070Spatrick         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
442e5dd7070Spatrick                                                        Ident.getHTMLIdent()));
443e5dd7070Spatrick         continue;
444e5dd7070Spatrick       }
445e5dd7070Spatrick       Token Equals = Tok;
446e5dd7070Spatrick       consumeToken();
447e5dd7070Spatrick       if (Tok.isNot(tok::html_quoted_string)) {
448e5dd7070Spatrick         Diag(Tok.getLocation(),
449e5dd7070Spatrick              diag::warn_doc_html_start_tag_expected_quoted_string)
450e5dd7070Spatrick           << SourceRange(Equals.getLocation());
451e5dd7070Spatrick         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
452e5dd7070Spatrick                                                        Ident.getHTMLIdent()));
453e5dd7070Spatrick         while (Tok.is(tok::html_equals) ||
454e5dd7070Spatrick                Tok.is(tok::html_quoted_string))
455e5dd7070Spatrick           consumeToken();
456e5dd7070Spatrick         continue;
457e5dd7070Spatrick       }
458e5dd7070Spatrick       Attrs.push_back(HTMLStartTagComment::Attribute(
459e5dd7070Spatrick                               Ident.getLocation(),
460e5dd7070Spatrick                               Ident.getHTMLIdent(),
461e5dd7070Spatrick                               Equals.getLocation(),
462e5dd7070Spatrick                               SourceRange(Tok.getLocation(),
463e5dd7070Spatrick                                           Tok.getEndLocation()),
464e5dd7070Spatrick                               Tok.getHTMLQuotedString()));
465e5dd7070Spatrick       consumeToken();
466e5dd7070Spatrick       continue;
467e5dd7070Spatrick     }
468e5dd7070Spatrick 
469e5dd7070Spatrick     case tok::html_greater:
470*12c85518Srobert       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
471e5dd7070Spatrick                                 Tok.getLocation(),
472e5dd7070Spatrick                                 /* IsSelfClosing = */ false);
473e5dd7070Spatrick       consumeToken();
474e5dd7070Spatrick       return HST;
475e5dd7070Spatrick 
476e5dd7070Spatrick     case tok::html_slash_greater:
477*12c85518Srobert       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
478e5dd7070Spatrick                                 Tok.getLocation(),
479e5dd7070Spatrick                                 /* IsSelfClosing = */ true);
480e5dd7070Spatrick       consumeToken();
481e5dd7070Spatrick       return HST;
482e5dd7070Spatrick 
483e5dd7070Spatrick     case tok::html_equals:
484e5dd7070Spatrick     case tok::html_quoted_string:
485e5dd7070Spatrick       Diag(Tok.getLocation(),
486e5dd7070Spatrick            diag::warn_doc_html_start_tag_expected_ident_or_greater);
487e5dd7070Spatrick       while (Tok.is(tok::html_equals) ||
488e5dd7070Spatrick              Tok.is(tok::html_quoted_string))
489e5dd7070Spatrick         consumeToken();
490e5dd7070Spatrick       if (Tok.is(tok::html_ident) ||
491e5dd7070Spatrick           Tok.is(tok::html_greater) ||
492e5dd7070Spatrick           Tok.is(tok::html_slash_greater))
493e5dd7070Spatrick         continue;
494e5dd7070Spatrick 
495*12c85518Srobert       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
496e5dd7070Spatrick                                 SourceLocation(),
497e5dd7070Spatrick                                 /* IsSelfClosing = */ false);
498e5dd7070Spatrick       return HST;
499e5dd7070Spatrick 
500e5dd7070Spatrick     default:
501e5dd7070Spatrick       // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.
502*12c85518Srobert       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)),
503e5dd7070Spatrick                                 SourceLocation(),
504e5dd7070Spatrick                                 /* IsSelfClosing = */ false);
505e5dd7070Spatrick       bool StartLineInvalid;
506e5dd7070Spatrick       const unsigned StartLine = SourceMgr.getPresumedLineNumber(
507e5dd7070Spatrick                                                   HST->getLocation(),
508e5dd7070Spatrick                                                   &StartLineInvalid);
509e5dd7070Spatrick       bool EndLineInvalid;
510e5dd7070Spatrick       const unsigned EndLine = SourceMgr.getPresumedLineNumber(
511e5dd7070Spatrick                                                   Tok.getLocation(),
512e5dd7070Spatrick                                                   &EndLineInvalid);
513e5dd7070Spatrick       if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
514e5dd7070Spatrick         Diag(Tok.getLocation(),
515e5dd7070Spatrick              diag::warn_doc_html_start_tag_expected_ident_or_greater)
516e5dd7070Spatrick           << HST->getSourceRange();
517e5dd7070Spatrick       else {
518e5dd7070Spatrick         Diag(Tok.getLocation(),
519e5dd7070Spatrick              diag::warn_doc_html_start_tag_expected_ident_or_greater);
520e5dd7070Spatrick         Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
521e5dd7070Spatrick           << HST->getSourceRange();
522e5dd7070Spatrick       }
523e5dd7070Spatrick       return HST;
524e5dd7070Spatrick     }
525e5dd7070Spatrick   }
526e5dd7070Spatrick }
527e5dd7070Spatrick 
parseHTMLEndTag()528e5dd7070Spatrick HTMLEndTagComment *Parser::parseHTMLEndTag() {
529e5dd7070Spatrick   assert(Tok.is(tok::html_end_tag));
530e5dd7070Spatrick   Token TokEndTag = Tok;
531e5dd7070Spatrick   consumeToken();
532e5dd7070Spatrick   SourceLocation Loc;
533e5dd7070Spatrick   if (Tok.is(tok::html_greater)) {
534e5dd7070Spatrick     Loc = Tok.getLocation();
535e5dd7070Spatrick     consumeToken();
536e5dd7070Spatrick   }
537e5dd7070Spatrick 
538e5dd7070Spatrick   return S.actOnHTMLEndTag(TokEndTag.getLocation(),
539e5dd7070Spatrick                            Loc,
540e5dd7070Spatrick                            TokEndTag.getHTMLTagEndName());
541e5dd7070Spatrick }
542e5dd7070Spatrick 
parseParagraphOrBlockCommand()543e5dd7070Spatrick BlockContentComment *Parser::parseParagraphOrBlockCommand() {
544e5dd7070Spatrick   SmallVector<InlineContentComment *, 8> Content;
545e5dd7070Spatrick 
546e5dd7070Spatrick   while (true) {
547e5dd7070Spatrick     switch (Tok.getKind()) {
548e5dd7070Spatrick     case tok::verbatim_block_begin:
549e5dd7070Spatrick     case tok::verbatim_line_name:
550e5dd7070Spatrick     case tok::eof:
551e5dd7070Spatrick       break; // Block content or EOF ahead, finish this parapgaph.
552e5dd7070Spatrick 
553e5dd7070Spatrick     case tok::unknown_command:
554e5dd7070Spatrick       Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
555e5dd7070Spatrick                                               Tok.getEndLocation(),
556e5dd7070Spatrick                                               Tok.getUnknownCommandName()));
557e5dd7070Spatrick       consumeToken();
558e5dd7070Spatrick       continue;
559e5dd7070Spatrick 
560e5dd7070Spatrick     case tok::backslash_command:
561e5dd7070Spatrick     case tok::at_command: {
562e5dd7070Spatrick       const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
563e5dd7070Spatrick       if (Info->IsBlockCommand) {
564e5dd7070Spatrick         if (Content.size() == 0)
565e5dd7070Spatrick           return parseBlockCommand();
566e5dd7070Spatrick         break; // Block command ahead, finish this parapgaph.
567e5dd7070Spatrick       }
568e5dd7070Spatrick       if (Info->IsVerbatimBlockEndCommand) {
569e5dd7070Spatrick         Diag(Tok.getLocation(),
570e5dd7070Spatrick              diag::warn_verbatim_block_end_without_start)
571e5dd7070Spatrick           << Tok.is(tok::at_command)
572e5dd7070Spatrick           << Info->Name
573e5dd7070Spatrick           << SourceRange(Tok.getLocation(), Tok.getEndLocation());
574e5dd7070Spatrick         consumeToken();
575e5dd7070Spatrick         continue;
576e5dd7070Spatrick       }
577e5dd7070Spatrick       if (Info->IsUnknownCommand) {
578e5dd7070Spatrick         Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
579e5dd7070Spatrick                                                 Tok.getEndLocation(),
580e5dd7070Spatrick                                                 Info->getID()));
581e5dd7070Spatrick         consumeToken();
582e5dd7070Spatrick         continue;
583e5dd7070Spatrick       }
584e5dd7070Spatrick       assert(Info->IsInlineCommand);
585e5dd7070Spatrick       Content.push_back(parseInlineCommand());
586e5dd7070Spatrick       continue;
587e5dd7070Spatrick     }
588e5dd7070Spatrick 
589e5dd7070Spatrick     case tok::newline: {
590e5dd7070Spatrick       consumeToken();
591e5dd7070Spatrick       if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
592e5dd7070Spatrick         consumeToken();
593e5dd7070Spatrick         break; // Two newlines -- end of paragraph.
594e5dd7070Spatrick       }
595e5dd7070Spatrick       // Also allow [tok::newline, tok::text, tok::newline] if the middle
596e5dd7070Spatrick       // tok::text is just whitespace.
597e5dd7070Spatrick       if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
598e5dd7070Spatrick         Token WhitespaceTok = Tok;
599e5dd7070Spatrick         consumeToken();
600e5dd7070Spatrick         if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
601e5dd7070Spatrick           consumeToken();
602e5dd7070Spatrick           break;
603e5dd7070Spatrick         }
604e5dd7070Spatrick         // We have [tok::newline, tok::text, non-newline].  Put back tok::text.
605e5dd7070Spatrick         putBack(WhitespaceTok);
606e5dd7070Spatrick       }
607e5dd7070Spatrick       if (Content.size() > 0)
608e5dd7070Spatrick         Content.back()->addTrailingNewline();
609e5dd7070Spatrick       continue;
610e5dd7070Spatrick     }
611e5dd7070Spatrick 
612e5dd7070Spatrick     // Don't deal with HTML tag soup now.
613e5dd7070Spatrick     case tok::html_start_tag:
614e5dd7070Spatrick       Content.push_back(parseHTMLStartTag());
615e5dd7070Spatrick       continue;
616e5dd7070Spatrick 
617e5dd7070Spatrick     case tok::html_end_tag:
618e5dd7070Spatrick       Content.push_back(parseHTMLEndTag());
619e5dd7070Spatrick       continue;
620e5dd7070Spatrick 
621e5dd7070Spatrick     case tok::text:
622e5dd7070Spatrick       Content.push_back(S.actOnText(Tok.getLocation(),
623e5dd7070Spatrick                                     Tok.getEndLocation(),
624e5dd7070Spatrick                                     Tok.getText()));
625e5dd7070Spatrick       consumeToken();
626e5dd7070Spatrick       continue;
627e5dd7070Spatrick 
628e5dd7070Spatrick     case tok::verbatim_block_line:
629e5dd7070Spatrick     case tok::verbatim_block_end:
630e5dd7070Spatrick     case tok::verbatim_line_text:
631e5dd7070Spatrick     case tok::html_ident:
632e5dd7070Spatrick     case tok::html_equals:
633e5dd7070Spatrick     case tok::html_quoted_string:
634e5dd7070Spatrick     case tok::html_greater:
635e5dd7070Spatrick     case tok::html_slash_greater:
636e5dd7070Spatrick       llvm_unreachable("should not see this token");
637e5dd7070Spatrick     }
638e5dd7070Spatrick     break;
639e5dd7070Spatrick   }
640e5dd7070Spatrick 
641*12c85518Srobert   return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content)));
642e5dd7070Spatrick }
643e5dd7070Spatrick 
parseVerbatimBlock()644e5dd7070Spatrick VerbatimBlockComment *Parser::parseVerbatimBlock() {
645e5dd7070Spatrick   assert(Tok.is(tok::verbatim_block_begin));
646e5dd7070Spatrick 
647e5dd7070Spatrick   VerbatimBlockComment *VB =
648e5dd7070Spatrick       S.actOnVerbatimBlockStart(Tok.getLocation(),
649e5dd7070Spatrick                                 Tok.getVerbatimBlockID());
650e5dd7070Spatrick   consumeToken();
651e5dd7070Spatrick 
652e5dd7070Spatrick   // Don't create an empty line if verbatim opening command is followed
653e5dd7070Spatrick   // by a newline.
654e5dd7070Spatrick   if (Tok.is(tok::newline))
655e5dd7070Spatrick     consumeToken();
656e5dd7070Spatrick 
657e5dd7070Spatrick   SmallVector<VerbatimBlockLineComment *, 8> Lines;
658e5dd7070Spatrick   while (Tok.is(tok::verbatim_block_line) ||
659e5dd7070Spatrick          Tok.is(tok::newline)) {
660e5dd7070Spatrick     VerbatimBlockLineComment *Line;
661e5dd7070Spatrick     if (Tok.is(tok::verbatim_block_line)) {
662e5dd7070Spatrick       Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
663e5dd7070Spatrick                                       Tok.getVerbatimBlockText());
664e5dd7070Spatrick       consumeToken();
665e5dd7070Spatrick       if (Tok.is(tok::newline)) {
666e5dd7070Spatrick         consumeToken();
667e5dd7070Spatrick       }
668e5dd7070Spatrick     } else {
669e5dd7070Spatrick       // Empty line, just a tok::newline.
670e5dd7070Spatrick       Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
671e5dd7070Spatrick       consumeToken();
672e5dd7070Spatrick     }
673e5dd7070Spatrick     Lines.push_back(Line);
674e5dd7070Spatrick   }
675e5dd7070Spatrick 
676e5dd7070Spatrick   if (Tok.is(tok::verbatim_block_end)) {
677e5dd7070Spatrick     const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
678*12c85518Srobert     S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name,
679*12c85518Srobert                                S.copyArray(llvm::ArrayRef(Lines)));
680e5dd7070Spatrick     consumeToken();
681e5dd7070Spatrick   } else {
682e5dd7070Spatrick     // Unterminated \\verbatim block
683e5dd7070Spatrick     S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
684*12c85518Srobert                                S.copyArray(llvm::ArrayRef(Lines)));
685e5dd7070Spatrick   }
686e5dd7070Spatrick 
687e5dd7070Spatrick   return VB;
688e5dd7070Spatrick }
689e5dd7070Spatrick 
parseVerbatimLine()690e5dd7070Spatrick VerbatimLineComment *Parser::parseVerbatimLine() {
691e5dd7070Spatrick   assert(Tok.is(tok::verbatim_line_name));
692e5dd7070Spatrick 
693e5dd7070Spatrick   Token NameTok = Tok;
694e5dd7070Spatrick   consumeToken();
695e5dd7070Spatrick 
696e5dd7070Spatrick   SourceLocation TextBegin;
697e5dd7070Spatrick   StringRef Text;
698e5dd7070Spatrick   // Next token might not be a tok::verbatim_line_text if verbatim line
699e5dd7070Spatrick   // starting command comes just before a newline or comment end.
700e5dd7070Spatrick   if (Tok.is(tok::verbatim_line_text)) {
701e5dd7070Spatrick     TextBegin = Tok.getLocation();
702e5dd7070Spatrick     Text = Tok.getVerbatimLineText();
703e5dd7070Spatrick   } else {
704e5dd7070Spatrick     TextBegin = NameTok.getEndLocation();
705e5dd7070Spatrick     Text = "";
706e5dd7070Spatrick   }
707e5dd7070Spatrick 
708e5dd7070Spatrick   VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
709e5dd7070Spatrick                                                 NameTok.getVerbatimLineID(),
710e5dd7070Spatrick                                                 TextBegin,
711e5dd7070Spatrick                                                 Text);
712e5dd7070Spatrick   consumeToken();
713e5dd7070Spatrick   return VL;
714e5dd7070Spatrick }
715e5dd7070Spatrick 
parseBlockContent()716e5dd7070Spatrick BlockContentComment *Parser::parseBlockContent() {
717e5dd7070Spatrick   switch (Tok.getKind()) {
718e5dd7070Spatrick   case tok::text:
719e5dd7070Spatrick   case tok::unknown_command:
720e5dd7070Spatrick   case tok::backslash_command:
721e5dd7070Spatrick   case tok::at_command:
722e5dd7070Spatrick   case tok::html_start_tag:
723e5dd7070Spatrick   case tok::html_end_tag:
724e5dd7070Spatrick     return parseParagraphOrBlockCommand();
725e5dd7070Spatrick 
726e5dd7070Spatrick   case tok::verbatim_block_begin:
727e5dd7070Spatrick     return parseVerbatimBlock();
728e5dd7070Spatrick 
729e5dd7070Spatrick   case tok::verbatim_line_name:
730e5dd7070Spatrick     return parseVerbatimLine();
731e5dd7070Spatrick 
732e5dd7070Spatrick   case tok::eof:
733e5dd7070Spatrick   case tok::newline:
734e5dd7070Spatrick   case tok::verbatim_block_line:
735e5dd7070Spatrick   case tok::verbatim_block_end:
736e5dd7070Spatrick   case tok::verbatim_line_text:
737e5dd7070Spatrick   case tok::html_ident:
738e5dd7070Spatrick   case tok::html_equals:
739e5dd7070Spatrick   case tok::html_quoted_string:
740e5dd7070Spatrick   case tok::html_greater:
741e5dd7070Spatrick   case tok::html_slash_greater:
742e5dd7070Spatrick     llvm_unreachable("should not see this token");
743e5dd7070Spatrick   }
744e5dd7070Spatrick   llvm_unreachable("bogus token kind");
745e5dd7070Spatrick }
746e5dd7070Spatrick 
parseFullComment()747e5dd7070Spatrick FullComment *Parser::parseFullComment() {
748e5dd7070Spatrick   // Skip newlines at the beginning of the comment.
749e5dd7070Spatrick   while (Tok.is(tok::newline))
750e5dd7070Spatrick     consumeToken();
751e5dd7070Spatrick 
752e5dd7070Spatrick   SmallVector<BlockContentComment *, 8> Blocks;
753e5dd7070Spatrick   while (Tok.isNot(tok::eof)) {
754e5dd7070Spatrick     Blocks.push_back(parseBlockContent());
755e5dd7070Spatrick 
756e5dd7070Spatrick     // Skip extra newlines after paragraph end.
757e5dd7070Spatrick     while (Tok.is(tok::newline))
758e5dd7070Spatrick       consumeToken();
759e5dd7070Spatrick   }
760*12c85518Srobert   return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks)));
761e5dd7070Spatrick }
762e5dd7070Spatrick 
763e5dd7070Spatrick } // end namespace comments
764e5dd7070Spatrick } // end namespace clang
765