xref: /llvm-project/clang/lib/AST/CommentParser.cpp (revision bacb9f65a7bf7ea60c0485cbb48a3098eef55563)
1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "clang/AST/CommentParser.h"
11 #include "clang/AST/CommentSema.h"
12 #include "llvm/Support/ErrorHandling.h"
13 
14 namespace clang {
15 namespace comments {
16 
17 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator):
18     L(L), S(S), Allocator(Allocator) {
19   consumeToken();
20 }
21 
22 ParamCommandComment *Parser::parseParamCommandArgs(
23     ParamCommandComment *PC,
24     TextTokenRetokenizer &Retokenizer) {
25   Token Arg;
26   // Check if argument looks like direction specification: [dir]
27   // e.g., [in], [out], [in,out]
28   if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
29     PC = S.actOnParamCommandArg(PC,
30                                 Arg.getLocation(),
31                                 Arg.getEndLocation(),
32                                 Arg.getText(),
33                                 /* IsDirection = */ true);
34 
35   if (Retokenizer.lexWord(Arg))
36     PC = S.actOnParamCommandArg(PC,
37                                 Arg.getLocation(),
38                                 Arg.getEndLocation(),
39                                 Arg.getText(),
40                                 /* IsDirection = */ false);
41 
42   return PC;
43 }
44 
45 BlockCommandComment *Parser::parseBlockCommandArgs(
46     BlockCommandComment *BC,
47     TextTokenRetokenizer &Retokenizer,
48     unsigned NumArgs) {
49   typedef BlockCommandComment::Argument Argument;
50   Argument *Args =
51       new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
52   unsigned ParsedArgs = 0;
53   Token Arg;
54   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
55     Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
56                                             Arg.getEndLocation()),
57                                 Arg.getText());
58     ParsedArgs++;
59   }
60 
61   return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
62 }
63 
64 BlockCommandComment *Parser::parseBlockCommand() {
65   assert(Tok.is(tok::command));
66 
67   ParamCommandComment *PC;
68   BlockCommandComment *BC;
69   bool IsParam = false;
70   unsigned NumArgs = 0;
71   if (S.isParamCommand(Tok.getCommandName())) {
72     IsParam = true;
73     PC = S.actOnParamCommandStart(Tok.getLocation(),
74                                   Tok.getEndLocation(),
75                                   Tok.getCommandName());
76   } else {
77     NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
78     BC = S.actOnBlockCommandStart(Tok.getLocation(),
79                                   Tok.getEndLocation(),
80                                   Tok.getCommandName());
81   }
82   consumeToken();
83 
84   if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
85     // Block command ahead.  We can't nest block commands, so pretend that this
86     // command has an empty argument.
87     // TODO: Diag() Warn empty arg to block command
88     ParagraphComment *PC = S.actOnParagraphComment(
89                                 ArrayRef<InlineContentComment *>());
90     return S.actOnBlockCommandFinish(BC, PC);
91   }
92 
93   if (IsParam || NumArgs > 0) {
94     // In order to parse command arguments we need to retokenize a few
95     // following text tokens.
96     TextTokenRetokenizer Retokenizer(Allocator);
97     while (Tok.is(tok::text)) {
98       if (Retokenizer.addToken(Tok))
99         consumeToken();
100     }
101 
102     if (IsParam)
103       PC = parseParamCommandArgs(PC, Retokenizer);
104     else
105       BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
106 
107     // Put back tokens we didn't use.
108     Token Text;
109     while (Retokenizer.lexText(Text))
110       putBack(Text);
111   }
112 
113   BlockContentComment *Block = parseParagraphOrBlockCommand();
114   // Since we have checked for a block command, we should have parsed a
115   // paragraph.
116   if (IsParam)
117     return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
118   else
119     return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
120 }
121 
122 InlineCommandComment *Parser::parseInlineCommand() {
123   assert(Tok.is(tok::command));
124 
125   const Token CommandTok = Tok;
126   consumeToken();
127 
128   TextTokenRetokenizer Retokenizer(Allocator);
129   while (Tok.is(tok::text)) {
130     if (Retokenizer.addToken(Tok))
131       consumeToken();
132   }
133 
134   Token ArgTok;
135   bool ArgTokValid = Retokenizer.lexWord(ArgTok);
136 
137   InlineCommandComment *IC;
138   if (ArgTokValid) {
139     IC = S.actOnInlineCommand(CommandTok.getLocation(),
140                               CommandTok.getEndLocation(),
141                               CommandTok.getCommandName(),
142                               ArgTok.getLocation(),
143                               ArgTok.getEndLocation(),
144                               ArgTok.getText());
145   } else {
146     IC = S.actOnInlineCommand(CommandTok.getLocation(),
147                               CommandTok.getEndLocation(),
148                               CommandTok.getCommandName());
149   }
150 
151   Token Text;
152   while (Retokenizer.lexText(Text))
153     putBack(Text);
154 
155   return IC;
156 }
157 
158 HTMLOpenTagComment *Parser::parseHTMLOpenTag() {
159   assert(Tok.is(tok::html_tag_open));
160   HTMLOpenTagComment *HOT =
161       S.actOnHTMLOpenTagStart(Tok.getLocation(),
162                               Tok.getHTMLTagOpenName());
163   consumeToken();
164 
165   SmallVector<HTMLOpenTagComment::Attribute, 2> Attrs;
166   while (true) {
167     if (Tok.is(tok::html_ident)) {
168       Token Ident = Tok;
169       consumeToken();
170       if (Tok.isNot(tok::html_equals)) {
171         Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
172                                                       Ident.getHTMLIdent()));
173         continue;
174       }
175       Token Equals = Tok;
176       consumeToken();
177       if (Tok.isNot(tok::html_quoted_string)) {
178         // TODO: Diag() expected quoted string
179         Attrs.push_back(HTMLOpenTagComment::Attribute(Ident.getLocation(),
180                                                       Ident.getHTMLIdent()));
181         continue;
182       }
183       Attrs.push_back(HTMLOpenTagComment::Attribute(
184                               Ident.getLocation(),
185                               Ident.getHTMLIdent(),
186                               Equals.getLocation(),
187                               SourceRange(Tok.getLocation(),
188                                           Tok.getEndLocation()),
189                               Tok.getHTMLQuotedString()));
190       consumeToken();
191       continue;
192     } else if (Tok.is(tok::html_greater)) {
193       HOT = S.actOnHTMLOpenTagFinish(HOT,
194                                      copyArray(llvm::makeArrayRef(Attrs)),
195                                      Tok.getLocation());
196       consumeToken();
197       return HOT;
198     } else if (Tok.is(tok::html_equals) ||
199                Tok.is(tok::html_quoted_string)) {
200       // TODO: Diag() Err expected ident
201       while (Tok.is(tok::html_equals) ||
202              Tok.is(tok::html_quoted_string))
203         consumeToken();
204     } else {
205       // Not a token from HTML open tag.  Thus HTML tag prematurely ended.
206       // TODO: Diag() Err HTML tag prematurely ended
207       return S.actOnHTMLOpenTagFinish(HOT,
208                                       copyArray(llvm::makeArrayRef(Attrs)),
209                                       SourceLocation());
210     }
211   }
212 }
213 
214 HTMLCloseTagComment *Parser::parseHTMLCloseTag() {
215   assert(Tok.is(tok::html_tag_close));
216   Token TokTagOpen = Tok;
217   consumeToken();
218   SourceLocation Loc;
219   if (Tok.is(tok::html_greater)) {
220     Loc = Tok.getLocation();
221     consumeToken();
222   }
223 
224   return S.actOnHTMLCloseTag(TokTagOpen.getLocation(),
225                              Loc,
226                              TokTagOpen.getHTMLTagCloseName());
227 }
228 
229 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
230   SmallVector<InlineContentComment *, 8> Content;
231 
232   while (true) {
233     switch (Tok.getKind()) {
234     case tok::verbatim_block_begin:
235     case tok::verbatim_line_name:
236     case tok::eof:
237       assert(Content.size() != 0);
238       break; // Block content or EOF ahead, finish this parapgaph.
239 
240     case tok::command:
241       if (S.isBlockCommand(Tok.getCommandName())) {
242         if (Content.size() == 0)
243           return parseBlockCommand();
244         break; // Block command ahead, finish this parapgaph.
245       }
246       if (S.isInlineCommand(Tok.getCommandName())) {
247         Content.push_back(parseInlineCommand());
248         continue;
249       }
250 
251       // Not a block command, not an inline command ==> an unknown command.
252       Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
253                                               Tok.getEndLocation(),
254                                               Tok.getCommandName()));
255       consumeToken();
256       continue;
257 
258     case tok::newline: {
259       consumeToken();
260       if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
261         consumeToken();
262         break; // Two newlines -- end of paragraph.
263       }
264       if (Content.size() > 0)
265         Content.back()->addTrailingNewline();
266       continue;
267     }
268 
269     // Don't deal with HTML tag soup now.
270     case tok::html_tag_open:
271       Content.push_back(parseHTMLOpenTag());
272       continue;
273 
274     case tok::html_tag_close:
275       Content.push_back(parseHTMLCloseTag());
276       continue;
277 
278     case tok::text:
279       Content.push_back(S.actOnText(Tok.getLocation(),
280                                     Tok.getEndLocation(),
281                                     Tok.getText()));
282       consumeToken();
283       continue;
284 
285     case tok::verbatim_block_line:
286     case tok::verbatim_block_end:
287     case tok::verbatim_line_text:
288     case tok::html_ident:
289     case tok::html_equals:
290     case tok::html_quoted_string:
291     case tok::html_greater:
292       llvm_unreachable("should not see this token");
293     }
294     break;
295   }
296 
297   return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
298 }
299 
300 VerbatimBlockComment *Parser::parseVerbatimBlock() {
301   assert(Tok.is(tok::verbatim_block_begin));
302 
303   VerbatimBlockComment *VB =
304       S.actOnVerbatimBlockStart(Tok.getLocation(),
305                                 Tok.getVerbatimBlockName());
306   consumeToken();
307 
308   // Don't create an empty line if verbatim opening command is followed
309   // by a newline.
310   if (Tok.is(tok::newline))
311     consumeToken();
312 
313   SmallVector<VerbatimBlockLineComment *, 8> Lines;
314   while (Tok.is(tok::verbatim_block_line) ||
315          Tok.is(tok::newline)) {
316     VerbatimBlockLineComment *Line;
317     if (Tok.is(tok::verbatim_block_line)) {
318       Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
319                                       Tok.getVerbatimBlockText());
320       consumeToken();
321       if (Tok.is(tok::newline)) {
322         consumeToken();
323       }
324     } else {
325       // Empty line, just a tok::newline.
326       Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
327                                       "");
328       consumeToken();
329     }
330     Lines.push_back(Line);
331   }
332 
333   assert(Tok.is(tok::verbatim_block_end));
334   VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
335                                   Tok.getVerbatimBlockName(),
336                                   copyArray(llvm::makeArrayRef(Lines)));
337   consumeToken();
338 
339   return VB;
340 }
341 
342 VerbatimLineComment *Parser::parseVerbatimLine() {
343   assert(Tok.is(tok::verbatim_line_name));
344 
345   Token NameTok = Tok;
346   consumeToken();
347 
348   SourceLocation TextBegin;
349   StringRef Text;
350   // Next token might not be a tok::verbatim_line_text if verbatim line
351   // starting command comes just before a newline or comment end.
352   if (Tok.is(tok::verbatim_line_text)) {
353     TextBegin = Tok.getLocation();
354     Text = Tok.getVerbatimLineText();
355   } else {
356     TextBegin = NameTok.getEndLocation();
357     Text = "";
358   }
359 
360   VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
361                                                 NameTok.getVerbatimLineName(),
362                                                 TextBegin,
363                                                 Text);
364   consumeToken();
365   return VL;
366 }
367 
368 BlockContentComment *Parser::parseBlockContent() {
369   switch (Tok.getKind()) {
370   case tok::text:
371   case tok::command:
372   case tok::html_tag_open:
373   case tok::html_tag_close:
374     return parseParagraphOrBlockCommand();
375 
376   case tok::verbatim_block_begin:
377     return parseVerbatimBlock();
378 
379   case tok::verbatim_line_name:
380     return parseVerbatimLine();
381 
382   case tok::eof:
383   case tok::newline:
384   case tok::verbatim_block_line:
385   case tok::verbatim_block_end:
386   case tok::verbatim_line_text:
387   case tok::html_ident:
388   case tok::html_equals:
389   case tok::html_quoted_string:
390   case tok::html_greater:
391     llvm_unreachable("should not see this token");
392   }
393 }
394 
395 FullComment *Parser::parseFullComment() {
396   // Skip newlines at the beginning of the comment.
397   while (Tok.is(tok::newline))
398     consumeToken();
399 
400   SmallVector<BlockContentComment *, 8> Blocks;
401   while (Tok.isNot(tok::eof)) {
402     Blocks.push_back(parseBlockContent());
403 
404     // Skip extra newlines after paragraph end.
405     while (Tok.is(tok::newline))
406       consumeToken();
407   }
408   return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
409 }
410 
411 } // end namespace comments
412 } // end namespace clang
413 
414 
415