1*7330f729Sjoerg //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2*7330f729Sjoerg //
3*7330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*7330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
5*7330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*7330f729Sjoerg //
7*7330f729Sjoerg //===----------------------------------------------------------------------===//
8*7330f729Sjoerg
9*7330f729Sjoerg #include "clang/AST/CommentParser.h"
10*7330f729Sjoerg #include "clang/AST/CommentCommandTraits.h"
11*7330f729Sjoerg #include "clang/AST/CommentDiagnostic.h"
12*7330f729Sjoerg #include "clang/AST/CommentSema.h"
13*7330f729Sjoerg #include "clang/Basic/CharInfo.h"
14*7330f729Sjoerg #include "clang/Basic/SourceManager.h"
15*7330f729Sjoerg #include "llvm/Support/ErrorHandling.h"
16*7330f729Sjoerg
17*7330f729Sjoerg namespace clang {
18*7330f729Sjoerg
isWhitespace(llvm::StringRef S)19*7330f729Sjoerg static inline bool isWhitespace(llvm::StringRef S) {
20*7330f729Sjoerg for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
21*7330f729Sjoerg if (!isWhitespace(*I))
22*7330f729Sjoerg return false;
23*7330f729Sjoerg }
24*7330f729Sjoerg return true;
25*7330f729Sjoerg }
26*7330f729Sjoerg
27*7330f729Sjoerg namespace comments {
28*7330f729Sjoerg
29*7330f729Sjoerg /// Re-lexes a sequence of tok::text tokens.
30*7330f729Sjoerg class TextTokenRetokenizer {
31*7330f729Sjoerg llvm::BumpPtrAllocator &Allocator;
32*7330f729Sjoerg Parser &P;
33*7330f729Sjoerg
34*7330f729Sjoerg /// This flag is set when there are no more tokens we can fetch from lexer.
35*7330f729Sjoerg bool NoMoreInterestingTokens;
36*7330f729Sjoerg
37*7330f729Sjoerg /// Token buffer: tokens we have processed and lookahead.
38*7330f729Sjoerg SmallVector<Token, 16> Toks;
39*7330f729Sjoerg
40*7330f729Sjoerg /// A position in \c Toks.
41*7330f729Sjoerg struct Position {
42*7330f729Sjoerg const char *BufferStart;
43*7330f729Sjoerg const char *BufferEnd;
44*7330f729Sjoerg const char *BufferPtr;
45*7330f729Sjoerg SourceLocation BufferStartLoc;
46*7330f729Sjoerg unsigned CurToken;
47*7330f729Sjoerg };
48*7330f729Sjoerg
49*7330f729Sjoerg /// Current position in Toks.
50*7330f729Sjoerg Position Pos;
51*7330f729Sjoerg
isEnd() const52*7330f729Sjoerg bool isEnd() const {
53*7330f729Sjoerg return Pos.CurToken >= Toks.size();
54*7330f729Sjoerg }
55*7330f729Sjoerg
56*7330f729Sjoerg /// Sets up the buffer pointers to point to current token.
setupBuffer()57*7330f729Sjoerg void setupBuffer() {
58*7330f729Sjoerg assert(!isEnd());
59*7330f729Sjoerg const Token &Tok = Toks[Pos.CurToken];
60*7330f729Sjoerg
61*7330f729Sjoerg Pos.BufferStart = Tok.getText().begin();
62*7330f729Sjoerg Pos.BufferEnd = Tok.getText().end();
63*7330f729Sjoerg Pos.BufferPtr = Pos.BufferStart;
64*7330f729Sjoerg Pos.BufferStartLoc = Tok.getLocation();
65*7330f729Sjoerg }
66*7330f729Sjoerg
getSourceLocation() const67*7330f729Sjoerg SourceLocation getSourceLocation() const {
68*7330f729Sjoerg const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
69*7330f729Sjoerg return Pos.BufferStartLoc.getLocWithOffset(CharNo);
70*7330f729Sjoerg }
71*7330f729Sjoerg
peek() const72*7330f729Sjoerg char peek() const {
73*7330f729Sjoerg assert(!isEnd());
74*7330f729Sjoerg assert(Pos.BufferPtr != Pos.BufferEnd);
75*7330f729Sjoerg return *Pos.BufferPtr;
76*7330f729Sjoerg }
77*7330f729Sjoerg
consumeChar()78*7330f729Sjoerg void consumeChar() {
79*7330f729Sjoerg assert(!isEnd());
80*7330f729Sjoerg assert(Pos.BufferPtr != Pos.BufferEnd);
81*7330f729Sjoerg Pos.BufferPtr++;
82*7330f729Sjoerg if (Pos.BufferPtr == Pos.BufferEnd) {
83*7330f729Sjoerg Pos.CurToken++;
84*7330f729Sjoerg if (isEnd() && !addToken())
85*7330f729Sjoerg return;
86*7330f729Sjoerg
87*7330f729Sjoerg assert(!isEnd());
88*7330f729Sjoerg setupBuffer();
89*7330f729Sjoerg }
90*7330f729Sjoerg }
91*7330f729Sjoerg
92*7330f729Sjoerg /// Add a token.
93*7330f729Sjoerg /// Returns true on success, false if there are no interesting tokens to
94*7330f729Sjoerg /// fetch from lexer.
addToken()95*7330f729Sjoerg bool addToken() {
96*7330f729Sjoerg if (NoMoreInterestingTokens)
97*7330f729Sjoerg return false;
98*7330f729Sjoerg
99*7330f729Sjoerg if (P.Tok.is(tok::newline)) {
100*7330f729Sjoerg // If we see a single newline token between text tokens, skip it.
101*7330f729Sjoerg Token Newline = P.Tok;
102*7330f729Sjoerg P.consumeToken();
103*7330f729Sjoerg if (P.Tok.isNot(tok::text)) {
104*7330f729Sjoerg P.putBack(Newline);
105*7330f729Sjoerg NoMoreInterestingTokens = true;
106*7330f729Sjoerg return false;
107*7330f729Sjoerg }
108*7330f729Sjoerg }
109*7330f729Sjoerg if (P.Tok.isNot(tok::text)) {
110*7330f729Sjoerg NoMoreInterestingTokens = true;
111*7330f729Sjoerg return false;
112*7330f729Sjoerg }
113*7330f729Sjoerg
114*7330f729Sjoerg Toks.push_back(P.Tok);
115*7330f729Sjoerg P.consumeToken();
116*7330f729Sjoerg if (Toks.size() == 1)
117*7330f729Sjoerg setupBuffer();
118*7330f729Sjoerg return true;
119*7330f729Sjoerg }
120*7330f729Sjoerg
consumeWhitespace()121*7330f729Sjoerg void consumeWhitespace() {
122*7330f729Sjoerg while (!isEnd()) {
123*7330f729Sjoerg if (isWhitespace(peek()))
124*7330f729Sjoerg consumeChar();
125*7330f729Sjoerg else
126*7330f729Sjoerg break;
127*7330f729Sjoerg }
128*7330f729Sjoerg }
129*7330f729Sjoerg
formTokenWithChars(Token & Result,SourceLocation Loc,const char * TokBegin,unsigned TokLength,StringRef Text)130*7330f729Sjoerg void formTokenWithChars(Token &Result,
131*7330f729Sjoerg SourceLocation Loc,
132*7330f729Sjoerg const char *TokBegin,
133*7330f729Sjoerg unsigned TokLength,
134*7330f729Sjoerg StringRef Text) {
135*7330f729Sjoerg Result.setLocation(Loc);
136*7330f729Sjoerg Result.setKind(tok::text);
137*7330f729Sjoerg Result.setLength(TokLength);
138*7330f729Sjoerg #ifndef NDEBUG
139*7330f729Sjoerg Result.TextPtr = "<UNSET>";
140*7330f729Sjoerg Result.IntVal = 7;
141*7330f729Sjoerg #endif
142*7330f729Sjoerg Result.setText(Text);
143*7330f729Sjoerg }
144*7330f729Sjoerg
145*7330f729Sjoerg public:
TextTokenRetokenizer(llvm::BumpPtrAllocator & Allocator,Parser & P)146*7330f729Sjoerg TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
147*7330f729Sjoerg Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
148*7330f729Sjoerg Pos.CurToken = 0;
149*7330f729Sjoerg addToken();
150*7330f729Sjoerg }
151*7330f729Sjoerg
152*7330f729Sjoerg /// Extract a word -- sequence of non-whitespace characters.
lexWord(Token & Tok)153*7330f729Sjoerg bool lexWord(Token &Tok) {
154*7330f729Sjoerg if (isEnd())
155*7330f729Sjoerg return false;
156*7330f729Sjoerg
157*7330f729Sjoerg Position SavedPos = Pos;
158*7330f729Sjoerg
159*7330f729Sjoerg consumeWhitespace();
160*7330f729Sjoerg SmallString<32> WordText;
161*7330f729Sjoerg const char *WordBegin = Pos.BufferPtr;
162*7330f729Sjoerg SourceLocation Loc = getSourceLocation();
163*7330f729Sjoerg while (!isEnd()) {
164*7330f729Sjoerg const char C = peek();
165*7330f729Sjoerg if (!isWhitespace(C)) {
166*7330f729Sjoerg WordText.push_back(C);
167*7330f729Sjoerg consumeChar();
168*7330f729Sjoerg } else
169*7330f729Sjoerg break;
170*7330f729Sjoerg }
171*7330f729Sjoerg const unsigned Length = WordText.size();
172*7330f729Sjoerg if (Length == 0) {
173*7330f729Sjoerg Pos = SavedPos;
174*7330f729Sjoerg return false;
175*7330f729Sjoerg }
176*7330f729Sjoerg
177*7330f729Sjoerg char *TextPtr = Allocator.Allocate<char>(Length + 1);
178*7330f729Sjoerg
179*7330f729Sjoerg memcpy(TextPtr, WordText.c_str(), Length + 1);
180*7330f729Sjoerg StringRef Text = StringRef(TextPtr, Length);
181*7330f729Sjoerg
182*7330f729Sjoerg formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
183*7330f729Sjoerg return true;
184*7330f729Sjoerg }
185*7330f729Sjoerg
lexDelimitedSeq(Token & Tok,char OpenDelim,char CloseDelim)186*7330f729Sjoerg bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
187*7330f729Sjoerg if (isEnd())
188*7330f729Sjoerg return false;
189*7330f729Sjoerg
190*7330f729Sjoerg Position SavedPos = Pos;
191*7330f729Sjoerg
192*7330f729Sjoerg consumeWhitespace();
193*7330f729Sjoerg SmallString<32> WordText;
194*7330f729Sjoerg const char *WordBegin = Pos.BufferPtr;
195*7330f729Sjoerg SourceLocation Loc = getSourceLocation();
196*7330f729Sjoerg bool Error = false;
197*7330f729Sjoerg if (!isEnd()) {
198*7330f729Sjoerg const char C = peek();
199*7330f729Sjoerg if (C == OpenDelim) {
200*7330f729Sjoerg WordText.push_back(C);
201*7330f729Sjoerg consumeChar();
202*7330f729Sjoerg } else
203*7330f729Sjoerg Error = true;
204*7330f729Sjoerg }
205*7330f729Sjoerg char C = '\0';
206*7330f729Sjoerg while (!Error && !isEnd()) {
207*7330f729Sjoerg C = peek();
208*7330f729Sjoerg WordText.push_back(C);
209*7330f729Sjoerg consumeChar();
210*7330f729Sjoerg if (C == CloseDelim)
211*7330f729Sjoerg break;
212*7330f729Sjoerg }
213*7330f729Sjoerg if (!Error && C != CloseDelim)
214*7330f729Sjoerg Error = true;
215*7330f729Sjoerg
216*7330f729Sjoerg if (Error) {
217*7330f729Sjoerg Pos = SavedPos;
218*7330f729Sjoerg return false;
219*7330f729Sjoerg }
220*7330f729Sjoerg
221*7330f729Sjoerg const unsigned Length = WordText.size();
222*7330f729Sjoerg char *TextPtr = Allocator.Allocate<char>(Length + 1);
223*7330f729Sjoerg
224*7330f729Sjoerg memcpy(TextPtr, WordText.c_str(), Length + 1);
225*7330f729Sjoerg StringRef Text = StringRef(TextPtr, Length);
226*7330f729Sjoerg
227*7330f729Sjoerg formTokenWithChars(Tok, Loc, WordBegin,
228*7330f729Sjoerg Pos.BufferPtr - WordBegin, Text);
229*7330f729Sjoerg return true;
230*7330f729Sjoerg }
231*7330f729Sjoerg
232*7330f729Sjoerg /// Put back tokens that we didn't consume.
putBackLeftoverTokens()233*7330f729Sjoerg void putBackLeftoverTokens() {
234*7330f729Sjoerg if (isEnd())
235*7330f729Sjoerg return;
236*7330f729Sjoerg
237*7330f729Sjoerg bool HavePartialTok = false;
238*7330f729Sjoerg Token PartialTok;
239*7330f729Sjoerg if (Pos.BufferPtr != Pos.BufferStart) {
240*7330f729Sjoerg formTokenWithChars(PartialTok, getSourceLocation(),
241*7330f729Sjoerg Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
242*7330f729Sjoerg StringRef(Pos.BufferPtr,
243*7330f729Sjoerg Pos.BufferEnd - Pos.BufferPtr));
244*7330f729Sjoerg HavePartialTok = true;
245*7330f729Sjoerg Pos.CurToken++;
246*7330f729Sjoerg }
247*7330f729Sjoerg
248*7330f729Sjoerg P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
249*7330f729Sjoerg Pos.CurToken = Toks.size();
250*7330f729Sjoerg
251*7330f729Sjoerg if (HavePartialTok)
252*7330f729Sjoerg P.putBack(PartialTok);
253*7330f729Sjoerg }
254*7330f729Sjoerg };
255*7330f729Sjoerg
Parser(Lexer & L,Sema & S,llvm::BumpPtrAllocator & Allocator,const SourceManager & SourceMgr,DiagnosticsEngine & Diags,const CommandTraits & Traits)256*7330f729Sjoerg Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
257*7330f729Sjoerg const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
258*7330f729Sjoerg const CommandTraits &Traits):
259*7330f729Sjoerg L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
260*7330f729Sjoerg Traits(Traits) {
261*7330f729Sjoerg consumeToken();
262*7330f729Sjoerg }
263*7330f729Sjoerg
parseParamCommandArgs(ParamCommandComment * PC,TextTokenRetokenizer & Retokenizer)264*7330f729Sjoerg void Parser::parseParamCommandArgs(ParamCommandComment *PC,
265*7330f729Sjoerg TextTokenRetokenizer &Retokenizer) {
266*7330f729Sjoerg Token Arg;
267*7330f729Sjoerg // Check if argument looks like direction specification: [dir]
268*7330f729Sjoerg // e.g., [in], [out], [in,out]
269*7330f729Sjoerg if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
270*7330f729Sjoerg S.actOnParamCommandDirectionArg(PC,
271*7330f729Sjoerg Arg.getLocation(),
272*7330f729Sjoerg Arg.getEndLocation(),
273*7330f729Sjoerg Arg.getText());
274*7330f729Sjoerg
275*7330f729Sjoerg if (Retokenizer.lexWord(Arg))
276*7330f729Sjoerg S.actOnParamCommandParamNameArg(PC,
277*7330f729Sjoerg Arg.getLocation(),
278*7330f729Sjoerg Arg.getEndLocation(),
279*7330f729Sjoerg Arg.getText());
280*7330f729Sjoerg }
281*7330f729Sjoerg
parseTParamCommandArgs(TParamCommandComment * TPC,TextTokenRetokenizer & Retokenizer)282*7330f729Sjoerg void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
283*7330f729Sjoerg TextTokenRetokenizer &Retokenizer) {
284*7330f729Sjoerg Token Arg;
285*7330f729Sjoerg if (Retokenizer.lexWord(Arg))
286*7330f729Sjoerg S.actOnTParamCommandParamNameArg(TPC,
287*7330f729Sjoerg Arg.getLocation(),
288*7330f729Sjoerg Arg.getEndLocation(),
289*7330f729Sjoerg Arg.getText());
290*7330f729Sjoerg }
291*7330f729Sjoerg
parseBlockCommandArgs(BlockCommandComment * BC,TextTokenRetokenizer & Retokenizer,unsigned NumArgs)292*7330f729Sjoerg void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
293*7330f729Sjoerg TextTokenRetokenizer &Retokenizer,
294*7330f729Sjoerg unsigned NumArgs) {
295*7330f729Sjoerg typedef BlockCommandComment::Argument Argument;
296*7330f729Sjoerg Argument *Args =
297*7330f729Sjoerg new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
298*7330f729Sjoerg unsigned ParsedArgs = 0;
299*7330f729Sjoerg Token Arg;
300*7330f729Sjoerg while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
301*7330f729Sjoerg Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
302*7330f729Sjoerg Arg.getEndLocation()),
303*7330f729Sjoerg Arg.getText());
304*7330f729Sjoerg ParsedArgs++;
305*7330f729Sjoerg }
306*7330f729Sjoerg
307*7330f729Sjoerg S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
308*7330f729Sjoerg }
309*7330f729Sjoerg
parseBlockCommand()310*7330f729Sjoerg BlockCommandComment *Parser::parseBlockCommand() {
311*7330f729Sjoerg assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
312*7330f729Sjoerg
313*7330f729Sjoerg ParamCommandComment *PC = nullptr;
314*7330f729Sjoerg TParamCommandComment *TPC = nullptr;
315*7330f729Sjoerg BlockCommandComment *BC = nullptr;
316*7330f729Sjoerg const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
317*7330f729Sjoerg CommandMarkerKind CommandMarker =
318*7330f729Sjoerg Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
319*7330f729Sjoerg if (Info->IsParamCommand) {
320*7330f729Sjoerg PC = S.actOnParamCommandStart(Tok.getLocation(),
321*7330f729Sjoerg Tok.getEndLocation(),
322*7330f729Sjoerg Tok.getCommandID(),
323*7330f729Sjoerg CommandMarker);
324*7330f729Sjoerg } else if (Info->IsTParamCommand) {
325*7330f729Sjoerg TPC = S.actOnTParamCommandStart(Tok.getLocation(),
326*7330f729Sjoerg Tok.getEndLocation(),
327*7330f729Sjoerg Tok.getCommandID(),
328*7330f729Sjoerg CommandMarker);
329*7330f729Sjoerg } else {
330*7330f729Sjoerg BC = S.actOnBlockCommandStart(Tok.getLocation(),
331*7330f729Sjoerg Tok.getEndLocation(),
332*7330f729Sjoerg Tok.getCommandID(),
333*7330f729Sjoerg CommandMarker);
334*7330f729Sjoerg }
335*7330f729Sjoerg consumeToken();
336*7330f729Sjoerg
337*7330f729Sjoerg if (isTokBlockCommand()) {
338*7330f729Sjoerg // Block command ahead. We can't nest block commands, so pretend that this
339*7330f729Sjoerg // command has an empty argument.
340*7330f729Sjoerg ParagraphComment *Paragraph = S.actOnParagraphComment(None);
341*7330f729Sjoerg if (PC) {
342*7330f729Sjoerg S.actOnParamCommandFinish(PC, Paragraph);
343*7330f729Sjoerg return PC;
344*7330f729Sjoerg } else if (TPC) {
345*7330f729Sjoerg S.actOnTParamCommandFinish(TPC, Paragraph);
346*7330f729Sjoerg return TPC;
347*7330f729Sjoerg } else {
348*7330f729Sjoerg S.actOnBlockCommandFinish(BC, Paragraph);
349*7330f729Sjoerg return BC;
350*7330f729Sjoerg }
351*7330f729Sjoerg }
352*7330f729Sjoerg
353*7330f729Sjoerg if (PC || TPC || Info->NumArgs > 0) {
354*7330f729Sjoerg // In order to parse command arguments we need to retokenize a few
355*7330f729Sjoerg // following text tokens.
356*7330f729Sjoerg TextTokenRetokenizer Retokenizer(Allocator, *this);
357*7330f729Sjoerg
358*7330f729Sjoerg if (PC)
359*7330f729Sjoerg parseParamCommandArgs(PC, Retokenizer);
360*7330f729Sjoerg else if (TPC)
361*7330f729Sjoerg parseTParamCommandArgs(TPC, Retokenizer);
362*7330f729Sjoerg else
363*7330f729Sjoerg parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);
364*7330f729Sjoerg
365*7330f729Sjoerg Retokenizer.putBackLeftoverTokens();
366*7330f729Sjoerg }
367*7330f729Sjoerg
368*7330f729Sjoerg // If there's a block command ahead, we will attach an empty paragraph to
369*7330f729Sjoerg // this command.
370*7330f729Sjoerg bool EmptyParagraph = false;
371*7330f729Sjoerg if (isTokBlockCommand())
372*7330f729Sjoerg EmptyParagraph = true;
373*7330f729Sjoerg else if (Tok.is(tok::newline)) {
374*7330f729Sjoerg Token PrevTok = Tok;
375*7330f729Sjoerg consumeToken();
376*7330f729Sjoerg EmptyParagraph = isTokBlockCommand();
377*7330f729Sjoerg putBack(PrevTok);
378*7330f729Sjoerg }
379*7330f729Sjoerg
380*7330f729Sjoerg ParagraphComment *Paragraph;
381*7330f729Sjoerg if (EmptyParagraph)
382*7330f729Sjoerg Paragraph = S.actOnParagraphComment(None);
383*7330f729Sjoerg else {
384*7330f729Sjoerg BlockContentComment *Block = parseParagraphOrBlockCommand();
385*7330f729Sjoerg // Since we have checked for a block command, we should have parsed a
386*7330f729Sjoerg // paragraph.
387*7330f729Sjoerg Paragraph = cast<ParagraphComment>(Block);
388*7330f729Sjoerg }
389*7330f729Sjoerg
390*7330f729Sjoerg if (PC) {
391*7330f729Sjoerg S.actOnParamCommandFinish(PC, Paragraph);
392*7330f729Sjoerg return PC;
393*7330f729Sjoerg } else if (TPC) {
394*7330f729Sjoerg S.actOnTParamCommandFinish(TPC, Paragraph);
395*7330f729Sjoerg return TPC;
396*7330f729Sjoerg } else {
397*7330f729Sjoerg S.actOnBlockCommandFinish(BC, Paragraph);
398*7330f729Sjoerg return BC;
399*7330f729Sjoerg }
400*7330f729Sjoerg }
401*7330f729Sjoerg
parseInlineCommand()402*7330f729Sjoerg InlineCommandComment *Parser::parseInlineCommand() {
403*7330f729Sjoerg assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
404*7330f729Sjoerg
405*7330f729Sjoerg const Token CommandTok = Tok;
406*7330f729Sjoerg consumeToken();
407*7330f729Sjoerg
408*7330f729Sjoerg TextTokenRetokenizer Retokenizer(Allocator, *this);
409*7330f729Sjoerg
410*7330f729Sjoerg Token ArgTok;
411*7330f729Sjoerg bool ArgTokValid = Retokenizer.lexWord(ArgTok);
412*7330f729Sjoerg
413*7330f729Sjoerg InlineCommandComment *IC;
414*7330f729Sjoerg if (ArgTokValid) {
415*7330f729Sjoerg IC = S.actOnInlineCommand(CommandTok.getLocation(),
416*7330f729Sjoerg CommandTok.getEndLocation(),
417*7330f729Sjoerg CommandTok.getCommandID(),
418*7330f729Sjoerg ArgTok.getLocation(),
419*7330f729Sjoerg ArgTok.getEndLocation(),
420*7330f729Sjoerg ArgTok.getText());
421*7330f729Sjoerg } else {
422*7330f729Sjoerg IC = S.actOnInlineCommand(CommandTok.getLocation(),
423*7330f729Sjoerg CommandTok.getEndLocation(),
424*7330f729Sjoerg CommandTok.getCommandID());
425*7330f729Sjoerg
426*7330f729Sjoerg Diag(CommandTok.getEndLocation().getLocWithOffset(1),
427*7330f729Sjoerg diag::warn_doc_inline_contents_no_argument)
428*7330f729Sjoerg << CommandTok.is(tok::at_command)
429*7330f729Sjoerg << Traits.getCommandInfo(CommandTok.getCommandID())->Name
430*7330f729Sjoerg << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation());
431*7330f729Sjoerg }
432*7330f729Sjoerg
433*7330f729Sjoerg Retokenizer.putBackLeftoverTokens();
434*7330f729Sjoerg
435*7330f729Sjoerg return IC;
436*7330f729Sjoerg }
437*7330f729Sjoerg
parseHTMLStartTag()438*7330f729Sjoerg HTMLStartTagComment *Parser::parseHTMLStartTag() {
439*7330f729Sjoerg assert(Tok.is(tok::html_start_tag));
440*7330f729Sjoerg HTMLStartTagComment *HST =
441*7330f729Sjoerg S.actOnHTMLStartTagStart(Tok.getLocation(),
442*7330f729Sjoerg Tok.getHTMLTagStartName());
443*7330f729Sjoerg consumeToken();
444*7330f729Sjoerg
445*7330f729Sjoerg SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
446*7330f729Sjoerg while (true) {
447*7330f729Sjoerg switch (Tok.getKind()) {
448*7330f729Sjoerg case tok::html_ident: {
449*7330f729Sjoerg Token Ident = Tok;
450*7330f729Sjoerg consumeToken();
451*7330f729Sjoerg if (Tok.isNot(tok::html_equals)) {
452*7330f729Sjoerg Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
453*7330f729Sjoerg Ident.getHTMLIdent()));
454*7330f729Sjoerg continue;
455*7330f729Sjoerg }
456*7330f729Sjoerg Token Equals = Tok;
457*7330f729Sjoerg consumeToken();
458*7330f729Sjoerg if (Tok.isNot(tok::html_quoted_string)) {
459*7330f729Sjoerg Diag(Tok.getLocation(),
460*7330f729Sjoerg diag::warn_doc_html_start_tag_expected_quoted_string)
461*7330f729Sjoerg << SourceRange(Equals.getLocation());
462*7330f729Sjoerg Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
463*7330f729Sjoerg Ident.getHTMLIdent()));
464*7330f729Sjoerg while (Tok.is(tok::html_equals) ||
465*7330f729Sjoerg Tok.is(tok::html_quoted_string))
466*7330f729Sjoerg consumeToken();
467*7330f729Sjoerg continue;
468*7330f729Sjoerg }
469*7330f729Sjoerg Attrs.push_back(HTMLStartTagComment::Attribute(
470*7330f729Sjoerg Ident.getLocation(),
471*7330f729Sjoerg Ident.getHTMLIdent(),
472*7330f729Sjoerg Equals.getLocation(),
473*7330f729Sjoerg SourceRange(Tok.getLocation(),
474*7330f729Sjoerg Tok.getEndLocation()),
475*7330f729Sjoerg Tok.getHTMLQuotedString()));
476*7330f729Sjoerg consumeToken();
477*7330f729Sjoerg continue;
478*7330f729Sjoerg }
479*7330f729Sjoerg
480*7330f729Sjoerg case tok::html_greater:
481*7330f729Sjoerg S.actOnHTMLStartTagFinish(HST,
482*7330f729Sjoerg S.copyArray(llvm::makeArrayRef(Attrs)),
483*7330f729Sjoerg Tok.getLocation(),
484*7330f729Sjoerg /* IsSelfClosing = */ false);
485*7330f729Sjoerg consumeToken();
486*7330f729Sjoerg return HST;
487*7330f729Sjoerg
488*7330f729Sjoerg case tok::html_slash_greater:
489*7330f729Sjoerg S.actOnHTMLStartTagFinish(HST,
490*7330f729Sjoerg S.copyArray(llvm::makeArrayRef(Attrs)),
491*7330f729Sjoerg Tok.getLocation(),
492*7330f729Sjoerg /* IsSelfClosing = */ true);
493*7330f729Sjoerg consumeToken();
494*7330f729Sjoerg return HST;
495*7330f729Sjoerg
496*7330f729Sjoerg case tok::html_equals:
497*7330f729Sjoerg case tok::html_quoted_string:
498*7330f729Sjoerg Diag(Tok.getLocation(),
499*7330f729Sjoerg diag::warn_doc_html_start_tag_expected_ident_or_greater);
500*7330f729Sjoerg while (Tok.is(tok::html_equals) ||
501*7330f729Sjoerg Tok.is(tok::html_quoted_string))
502*7330f729Sjoerg consumeToken();
503*7330f729Sjoerg if (Tok.is(tok::html_ident) ||
504*7330f729Sjoerg Tok.is(tok::html_greater) ||
505*7330f729Sjoerg Tok.is(tok::html_slash_greater))
506*7330f729Sjoerg continue;
507*7330f729Sjoerg
508*7330f729Sjoerg S.actOnHTMLStartTagFinish(HST,
509*7330f729Sjoerg S.copyArray(llvm::makeArrayRef(Attrs)),
510*7330f729Sjoerg SourceLocation(),
511*7330f729Sjoerg /* IsSelfClosing = */ false);
512*7330f729Sjoerg return HST;
513*7330f729Sjoerg
514*7330f729Sjoerg default:
515*7330f729Sjoerg // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
516*7330f729Sjoerg S.actOnHTMLStartTagFinish(HST,
517*7330f729Sjoerg S.copyArray(llvm::makeArrayRef(Attrs)),
518*7330f729Sjoerg SourceLocation(),
519*7330f729Sjoerg /* IsSelfClosing = */ false);
520*7330f729Sjoerg bool StartLineInvalid;
521*7330f729Sjoerg const unsigned StartLine = SourceMgr.getPresumedLineNumber(
522*7330f729Sjoerg HST->getLocation(),
523*7330f729Sjoerg &StartLineInvalid);
524*7330f729Sjoerg bool EndLineInvalid;
525*7330f729Sjoerg const unsigned EndLine = SourceMgr.getPresumedLineNumber(
526*7330f729Sjoerg Tok.getLocation(),
527*7330f729Sjoerg &EndLineInvalid);
528*7330f729Sjoerg if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
529*7330f729Sjoerg Diag(Tok.getLocation(),
530*7330f729Sjoerg diag::warn_doc_html_start_tag_expected_ident_or_greater)
531*7330f729Sjoerg << HST->getSourceRange();
532*7330f729Sjoerg else {
533*7330f729Sjoerg Diag(Tok.getLocation(),
534*7330f729Sjoerg diag::warn_doc_html_start_tag_expected_ident_or_greater);
535*7330f729Sjoerg Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
536*7330f729Sjoerg << HST->getSourceRange();
537*7330f729Sjoerg }
538*7330f729Sjoerg return HST;
539*7330f729Sjoerg }
540*7330f729Sjoerg }
541*7330f729Sjoerg }
542*7330f729Sjoerg
parseHTMLEndTag()543*7330f729Sjoerg HTMLEndTagComment *Parser::parseHTMLEndTag() {
544*7330f729Sjoerg assert(Tok.is(tok::html_end_tag));
545*7330f729Sjoerg Token TokEndTag = Tok;
546*7330f729Sjoerg consumeToken();
547*7330f729Sjoerg SourceLocation Loc;
548*7330f729Sjoerg if (Tok.is(tok::html_greater)) {
549*7330f729Sjoerg Loc = Tok.getLocation();
550*7330f729Sjoerg consumeToken();
551*7330f729Sjoerg }
552*7330f729Sjoerg
553*7330f729Sjoerg return S.actOnHTMLEndTag(TokEndTag.getLocation(),
554*7330f729Sjoerg Loc,
555*7330f729Sjoerg TokEndTag.getHTMLTagEndName());
556*7330f729Sjoerg }
557*7330f729Sjoerg
parseParagraphOrBlockCommand()558*7330f729Sjoerg BlockContentComment *Parser::parseParagraphOrBlockCommand() {
559*7330f729Sjoerg SmallVector<InlineContentComment *, 8> Content;
560*7330f729Sjoerg
561*7330f729Sjoerg while (true) {
562*7330f729Sjoerg switch (Tok.getKind()) {
563*7330f729Sjoerg case tok::verbatim_block_begin:
564*7330f729Sjoerg case tok::verbatim_line_name:
565*7330f729Sjoerg case tok::eof:
566*7330f729Sjoerg break; // Block content or EOF ahead, finish this parapgaph.
567*7330f729Sjoerg
568*7330f729Sjoerg case tok::unknown_command:
569*7330f729Sjoerg Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
570*7330f729Sjoerg Tok.getEndLocation(),
571*7330f729Sjoerg Tok.getUnknownCommandName()));
572*7330f729Sjoerg consumeToken();
573*7330f729Sjoerg continue;
574*7330f729Sjoerg
575*7330f729Sjoerg case tok::backslash_command:
576*7330f729Sjoerg case tok::at_command: {
577*7330f729Sjoerg const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
578*7330f729Sjoerg if (Info->IsBlockCommand) {
579*7330f729Sjoerg if (Content.size() == 0)
580*7330f729Sjoerg return parseBlockCommand();
581*7330f729Sjoerg break; // Block command ahead, finish this parapgaph.
582*7330f729Sjoerg }
583*7330f729Sjoerg if (Info->IsVerbatimBlockEndCommand) {
584*7330f729Sjoerg Diag(Tok.getLocation(),
585*7330f729Sjoerg diag::warn_verbatim_block_end_without_start)
586*7330f729Sjoerg << Tok.is(tok::at_command)
587*7330f729Sjoerg << Info->Name
588*7330f729Sjoerg << SourceRange(Tok.getLocation(), Tok.getEndLocation());
589*7330f729Sjoerg consumeToken();
590*7330f729Sjoerg continue;
591*7330f729Sjoerg }
592*7330f729Sjoerg if (Info->IsUnknownCommand) {
593*7330f729Sjoerg Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
594*7330f729Sjoerg Tok.getEndLocation(),
595*7330f729Sjoerg Info->getID()));
596*7330f729Sjoerg consumeToken();
597*7330f729Sjoerg continue;
598*7330f729Sjoerg }
599*7330f729Sjoerg assert(Info->IsInlineCommand);
600*7330f729Sjoerg Content.push_back(parseInlineCommand());
601*7330f729Sjoerg continue;
602*7330f729Sjoerg }
603*7330f729Sjoerg
604*7330f729Sjoerg case tok::newline: {
605*7330f729Sjoerg consumeToken();
606*7330f729Sjoerg if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
607*7330f729Sjoerg consumeToken();
608*7330f729Sjoerg break; // Two newlines -- end of paragraph.
609*7330f729Sjoerg }
610*7330f729Sjoerg // Also allow [tok::newline, tok::text, tok::newline] if the middle
611*7330f729Sjoerg // tok::text is just whitespace.
612*7330f729Sjoerg if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
613*7330f729Sjoerg Token WhitespaceTok = Tok;
614*7330f729Sjoerg consumeToken();
615*7330f729Sjoerg if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
616*7330f729Sjoerg consumeToken();
617*7330f729Sjoerg break;
618*7330f729Sjoerg }
619*7330f729Sjoerg // We have [tok::newline, tok::text, non-newline]. Put back tok::text.
620*7330f729Sjoerg putBack(WhitespaceTok);
621*7330f729Sjoerg }
622*7330f729Sjoerg if (Content.size() > 0)
623*7330f729Sjoerg Content.back()->addTrailingNewline();
624*7330f729Sjoerg continue;
625*7330f729Sjoerg }
626*7330f729Sjoerg
627*7330f729Sjoerg // Don't deal with HTML tag soup now.
628*7330f729Sjoerg case tok::html_start_tag:
629*7330f729Sjoerg Content.push_back(parseHTMLStartTag());
630*7330f729Sjoerg continue;
631*7330f729Sjoerg
632*7330f729Sjoerg case tok::html_end_tag:
633*7330f729Sjoerg Content.push_back(parseHTMLEndTag());
634*7330f729Sjoerg continue;
635*7330f729Sjoerg
636*7330f729Sjoerg case tok::text:
637*7330f729Sjoerg Content.push_back(S.actOnText(Tok.getLocation(),
638*7330f729Sjoerg Tok.getEndLocation(),
639*7330f729Sjoerg Tok.getText()));
640*7330f729Sjoerg consumeToken();
641*7330f729Sjoerg continue;
642*7330f729Sjoerg
643*7330f729Sjoerg case tok::verbatim_block_line:
644*7330f729Sjoerg case tok::verbatim_block_end:
645*7330f729Sjoerg case tok::verbatim_line_text:
646*7330f729Sjoerg case tok::html_ident:
647*7330f729Sjoerg case tok::html_equals:
648*7330f729Sjoerg case tok::html_quoted_string:
649*7330f729Sjoerg case tok::html_greater:
650*7330f729Sjoerg case tok::html_slash_greater:
651*7330f729Sjoerg llvm_unreachable("should not see this token");
652*7330f729Sjoerg }
653*7330f729Sjoerg break;
654*7330f729Sjoerg }
655*7330f729Sjoerg
656*7330f729Sjoerg return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
657*7330f729Sjoerg }
658*7330f729Sjoerg
parseVerbatimBlock()659*7330f729Sjoerg VerbatimBlockComment *Parser::parseVerbatimBlock() {
660*7330f729Sjoerg assert(Tok.is(tok::verbatim_block_begin));
661*7330f729Sjoerg
662*7330f729Sjoerg VerbatimBlockComment *VB =
663*7330f729Sjoerg S.actOnVerbatimBlockStart(Tok.getLocation(),
664*7330f729Sjoerg Tok.getVerbatimBlockID());
665*7330f729Sjoerg consumeToken();
666*7330f729Sjoerg
667*7330f729Sjoerg // Don't create an empty line if verbatim opening command is followed
668*7330f729Sjoerg // by a newline.
669*7330f729Sjoerg if (Tok.is(tok::newline))
670*7330f729Sjoerg consumeToken();
671*7330f729Sjoerg
672*7330f729Sjoerg SmallVector<VerbatimBlockLineComment *, 8> Lines;
673*7330f729Sjoerg while (Tok.is(tok::verbatim_block_line) ||
674*7330f729Sjoerg Tok.is(tok::newline)) {
675*7330f729Sjoerg VerbatimBlockLineComment *Line;
676*7330f729Sjoerg if (Tok.is(tok::verbatim_block_line)) {
677*7330f729Sjoerg Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
678*7330f729Sjoerg Tok.getVerbatimBlockText());
679*7330f729Sjoerg consumeToken();
680*7330f729Sjoerg if (Tok.is(tok::newline)) {
681*7330f729Sjoerg consumeToken();
682*7330f729Sjoerg }
683*7330f729Sjoerg } else {
684*7330f729Sjoerg // Empty line, just a tok::newline.
685*7330f729Sjoerg Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
686*7330f729Sjoerg consumeToken();
687*7330f729Sjoerg }
688*7330f729Sjoerg Lines.push_back(Line);
689*7330f729Sjoerg }
690*7330f729Sjoerg
691*7330f729Sjoerg if (Tok.is(tok::verbatim_block_end)) {
692*7330f729Sjoerg const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
693*7330f729Sjoerg S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
694*7330f729Sjoerg Info->Name,
695*7330f729Sjoerg S.copyArray(llvm::makeArrayRef(Lines)));
696*7330f729Sjoerg consumeToken();
697*7330f729Sjoerg } else {
698*7330f729Sjoerg // Unterminated \\verbatim block
699*7330f729Sjoerg S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
700*7330f729Sjoerg S.copyArray(llvm::makeArrayRef(Lines)));
701*7330f729Sjoerg }
702*7330f729Sjoerg
703*7330f729Sjoerg return VB;
704*7330f729Sjoerg }
705*7330f729Sjoerg
parseVerbatimLine()706*7330f729Sjoerg VerbatimLineComment *Parser::parseVerbatimLine() {
707*7330f729Sjoerg assert(Tok.is(tok::verbatim_line_name));
708*7330f729Sjoerg
709*7330f729Sjoerg Token NameTok = Tok;
710*7330f729Sjoerg consumeToken();
711*7330f729Sjoerg
712*7330f729Sjoerg SourceLocation TextBegin;
713*7330f729Sjoerg StringRef Text;
714*7330f729Sjoerg // Next token might not be a tok::verbatim_line_text if verbatim line
715*7330f729Sjoerg // starting command comes just before a newline or comment end.
716*7330f729Sjoerg if (Tok.is(tok::verbatim_line_text)) {
717*7330f729Sjoerg TextBegin = Tok.getLocation();
718*7330f729Sjoerg Text = Tok.getVerbatimLineText();
719*7330f729Sjoerg } else {
720*7330f729Sjoerg TextBegin = NameTok.getEndLocation();
721*7330f729Sjoerg Text = "";
722*7330f729Sjoerg }
723*7330f729Sjoerg
724*7330f729Sjoerg VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
725*7330f729Sjoerg NameTok.getVerbatimLineID(),
726*7330f729Sjoerg TextBegin,
727*7330f729Sjoerg Text);
728*7330f729Sjoerg consumeToken();
729*7330f729Sjoerg return VL;
730*7330f729Sjoerg }
731*7330f729Sjoerg
parseBlockContent()732*7330f729Sjoerg BlockContentComment *Parser::parseBlockContent() {
733*7330f729Sjoerg switch (Tok.getKind()) {
734*7330f729Sjoerg case tok::text:
735*7330f729Sjoerg case tok::unknown_command:
736*7330f729Sjoerg case tok::backslash_command:
737*7330f729Sjoerg case tok::at_command:
738*7330f729Sjoerg case tok::html_start_tag:
739*7330f729Sjoerg case tok::html_end_tag:
740*7330f729Sjoerg return parseParagraphOrBlockCommand();
741*7330f729Sjoerg
742*7330f729Sjoerg case tok::verbatim_block_begin:
743*7330f729Sjoerg return parseVerbatimBlock();
744*7330f729Sjoerg
745*7330f729Sjoerg case tok::verbatim_line_name:
746*7330f729Sjoerg return parseVerbatimLine();
747*7330f729Sjoerg
748*7330f729Sjoerg case tok::eof:
749*7330f729Sjoerg case tok::newline:
750*7330f729Sjoerg case tok::verbatim_block_line:
751*7330f729Sjoerg case tok::verbatim_block_end:
752*7330f729Sjoerg case tok::verbatim_line_text:
753*7330f729Sjoerg case tok::html_ident:
754*7330f729Sjoerg case tok::html_equals:
755*7330f729Sjoerg case tok::html_quoted_string:
756*7330f729Sjoerg case tok::html_greater:
757*7330f729Sjoerg case tok::html_slash_greater:
758*7330f729Sjoerg llvm_unreachable("should not see this token");
759*7330f729Sjoerg }
760*7330f729Sjoerg llvm_unreachable("bogus token kind");
761*7330f729Sjoerg }
762*7330f729Sjoerg
parseFullComment()763*7330f729Sjoerg FullComment *Parser::parseFullComment() {
764*7330f729Sjoerg // Skip newlines at the beginning of the comment.
765*7330f729Sjoerg while (Tok.is(tok::newline))
766*7330f729Sjoerg consumeToken();
767*7330f729Sjoerg
768*7330f729Sjoerg SmallVector<BlockContentComment *, 8> Blocks;
769*7330f729Sjoerg while (Tok.isNot(tok::eof)) {
770*7330f729Sjoerg Blocks.push_back(parseBlockContent());
771*7330f729Sjoerg
772*7330f729Sjoerg // Skip extra newlines after paragraph end.
773*7330f729Sjoerg while (Tok.is(tok::newline))
774*7330f729Sjoerg consumeToken();
775*7330f729Sjoerg }
776*7330f729Sjoerg return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
777*7330f729Sjoerg }
778*7330f729Sjoerg
779*7330f729Sjoerg } // end namespace comments
780*7330f729Sjoerg } // end namespace clang
781