xref: /llvm-project/clang/lib/AST/CommentParser.cpp (revision 1c85d5b17d0bd4b688e6fd4a59ad9c6dc4a817a6)
1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "clang/AST/CommentParser.h"
11 #include "clang/AST/CommentSema.h"
12 #include "clang/AST/CommentDiagnostic.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "llvm/Support/ErrorHandling.h"
15 
16 namespace clang {
17 namespace comments {
18 
19 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
20                const SourceManager &SourceMgr, DiagnosticsEngine &Diags):
21     L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) {
22   consumeToken();
23 }
24 
25 ParamCommandComment *Parser::parseParamCommandArgs(
26     ParamCommandComment *PC,
27     TextTokenRetokenizer &Retokenizer) {
28   Token Arg;
29   // Check if argument looks like direction specification: [dir]
30   // e.g., [in], [out], [in,out]
31   if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
32     PC = S.actOnParamCommandDirectionArg(PC,
33                                          Arg.getLocation(),
34                                          Arg.getEndLocation(),
35                                          Arg.getText());
36 
37   if (Retokenizer.lexWord(Arg))
38     PC = S.actOnParamCommandParamNameArg(PC,
39                                          Arg.getLocation(),
40                                          Arg.getEndLocation(),
41                                          Arg.getText());
42 
43   return PC;
44 }
45 
46 BlockCommandComment *Parser::parseBlockCommandArgs(
47     BlockCommandComment *BC,
48     TextTokenRetokenizer &Retokenizer,
49     unsigned NumArgs) {
50   typedef BlockCommandComment::Argument Argument;
51   Argument *Args =
52       new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
53   unsigned ParsedArgs = 0;
54   Token Arg;
55   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
56     Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
57                                             Arg.getEndLocation()),
58                                 Arg.getText());
59     ParsedArgs++;
60   }
61 
62   return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
63 }
64 
65 BlockCommandComment *Parser::parseBlockCommand() {
66   assert(Tok.is(tok::command));
67 
68   ParamCommandComment *PC;
69   BlockCommandComment *BC;
70   bool IsParam = false;
71   unsigned NumArgs = 0;
72   if (S.isParamCommand(Tok.getCommandName())) {
73     IsParam = true;
74     PC = S.actOnParamCommandStart(Tok.getLocation(),
75                                   Tok.getEndLocation(),
76                                   Tok.getCommandName());
77   } else {
78     NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
79     BC = S.actOnBlockCommandStart(Tok.getLocation(),
80                                   Tok.getEndLocation(),
81                                   Tok.getCommandName());
82   }
83   consumeToken();
84 
85   if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
86     // Block command ahead.  We can't nest block commands, so pretend that this
87     // command has an empty argument.
88     ParagraphComment *PC = S.actOnParagraphComment(
89                                 ArrayRef<InlineContentComment *>());
90     return S.actOnBlockCommandFinish(BC, PC);
91   }
92 
93   if (IsParam || NumArgs > 0) {
94     // In order to parse command arguments we need to retokenize a few
95     // following text tokens.
96     TextTokenRetokenizer Retokenizer(Allocator);
97     while (Tok.is(tok::text)) {
98       if (Retokenizer.addToken(Tok))
99         consumeToken();
100     }
101 
102     if (IsParam)
103       PC = parseParamCommandArgs(PC, Retokenizer);
104     else
105       BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
106 
107     // Put back tokens we didn't use.
108     SmallVector<Token, 16> TextToks;
109     Token Text;
110     while (Retokenizer.lexText(Text)) {
111       TextToks.push_back(Text);
112     }
113     putBack(TextToks);
114   }
115 
116   BlockContentComment *Block = parseParagraphOrBlockCommand();
117   // Since we have checked for a block command, we should have parsed a
118   // paragraph.
119   if (IsParam)
120     return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
121   else
122     return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
123 }
124 
125 InlineCommandComment *Parser::parseInlineCommand() {
126   assert(Tok.is(tok::command));
127 
128   const Token CommandTok = Tok;
129   consumeToken();
130 
131   TextTokenRetokenizer Retokenizer(Allocator);
132   while (Tok.is(tok::text)) {
133     if (Retokenizer.addToken(Tok))
134       consumeToken();
135   }
136 
137   Token ArgTok;
138   bool ArgTokValid = Retokenizer.lexWord(ArgTok);
139 
140   InlineCommandComment *IC;
141   if (ArgTokValid) {
142     IC = S.actOnInlineCommand(CommandTok.getLocation(),
143                               CommandTok.getEndLocation(),
144                               CommandTok.getCommandName(),
145                               ArgTok.getLocation(),
146                               ArgTok.getEndLocation(),
147                               ArgTok.getText());
148   } else {
149     IC = S.actOnInlineCommand(CommandTok.getLocation(),
150                               CommandTok.getEndLocation(),
151                               CommandTok.getCommandName());
152   }
153 
154   Token Text;
155   while (Retokenizer.lexText(Text))
156     putBack(Text);
157 
158   return IC;
159 }
160 
161 HTMLStartTagComment *Parser::parseHTMLStartTag() {
162   assert(Tok.is(tok::html_start_tag));
163   HTMLStartTagComment *HST =
164       S.actOnHTMLStartTagStart(Tok.getLocation(),
165                                Tok.getHTMLTagStartName());
166   consumeToken();
167 
168   SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
169   while (true) {
170     switch (Tok.getKind()) {
171     case tok::html_ident: {
172       Token Ident = Tok;
173       consumeToken();
174       if (Tok.isNot(tok::html_equals)) {
175         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
176                                                        Ident.getHTMLIdent()));
177         continue;
178       }
179       Token Equals = Tok;
180       consumeToken();
181       if (Tok.isNot(tok::html_quoted_string)) {
182         Diag(Tok.getLocation(),
183              diag::warn_doc_html_start_tag_expected_quoted_string)
184           << SourceRange(Equals.getLocation());
185         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
186                                                        Ident.getHTMLIdent()));
187         while (Tok.is(tok::html_equals) ||
188                Tok.is(tok::html_quoted_string))
189           consumeToken();
190         continue;
191       }
192       Attrs.push_back(HTMLStartTagComment::Attribute(
193                               Ident.getLocation(),
194                               Ident.getHTMLIdent(),
195                               Equals.getLocation(),
196                               SourceRange(Tok.getLocation(),
197                                           Tok.getEndLocation()),
198                               Tok.getHTMLQuotedString()));
199       consumeToken();
200       continue;
201     }
202 
203     case tok::html_greater:
204       HST = S.actOnHTMLStartTagFinish(HST,
205                                       copyArray(llvm::makeArrayRef(Attrs)),
206                                       Tok.getLocation(),
207                                       /* IsSelfClosing = */ false);
208       consumeToken();
209       return HST;
210 
211     case tok::html_slash_greater:
212       HST = S.actOnHTMLStartTagFinish(HST,
213                                       copyArray(llvm::makeArrayRef(Attrs)),
214                                       Tok.getLocation(),
215                                       /* IsSelfClosing = */ true);
216       consumeToken();
217       return HST;
218 
219     case tok::html_equals:
220     case tok::html_quoted_string:
221       Diag(Tok.getLocation(),
222            diag::warn_doc_html_start_tag_expected_ident_or_greater);
223       while (Tok.is(tok::html_equals) ||
224              Tok.is(tok::html_quoted_string))
225         consumeToken();
226       if (Tok.is(tok::html_ident) ||
227           Tok.is(tok::html_greater) ||
228           Tok.is(tok::html_slash_greater))
229         continue;
230 
231       return S.actOnHTMLStartTagFinish(HST,
232                                        copyArray(llvm::makeArrayRef(Attrs)),
233                                        SourceLocation(),
234                                        /* IsSelfClosing = */ false);
235 
236     default:
237       // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.
238       HST = S.actOnHTMLStartTagFinish(HST,
239                                       copyArray(llvm::makeArrayRef(Attrs)),
240                                       SourceLocation(),
241                                       /* IsSelfClosing = */ false);
242       bool StartLineInvalid;
243       const unsigned StartLine = SourceMgr.getPresumedLineNumber(
244                                                   HST->getLocation(),
245                                                   &StartLineInvalid);
246       bool EndLineInvalid;
247       const unsigned EndLine = SourceMgr.getPresumedLineNumber(
248                                                   Tok.getLocation(),
249                                                   &EndLineInvalid);
250       if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
251         Diag(Tok.getLocation(),
252              diag::warn_doc_html_start_tag_expected_ident_or_greater)
253           << HST->getSourceRange();
254       else {
255         Diag(Tok.getLocation(),
256              diag::warn_doc_html_start_tag_expected_ident_or_greater);
257         Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
258           << HST->getSourceRange();
259       }
260       return HST;
261     }
262   }
263 }
264 
265 HTMLEndTagComment *Parser::parseHTMLEndTag() {
266   assert(Tok.is(tok::html_end_tag));
267   Token TokEndTag = Tok;
268   consumeToken();
269   SourceLocation Loc;
270   if (Tok.is(tok::html_greater)) {
271     Loc = Tok.getLocation();
272     consumeToken();
273   }
274 
275   return S.actOnHTMLEndTag(TokEndTag.getLocation(),
276                            Loc,
277                            TokEndTag.getHTMLTagEndName());
278 }
279 
280 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
281   SmallVector<InlineContentComment *, 8> Content;
282 
283   while (true) {
284     switch (Tok.getKind()) {
285     case tok::verbatim_block_begin:
286     case tok::verbatim_line_name:
287     case tok::eof:
288       assert(Content.size() != 0);
289       break; // Block content or EOF ahead, finish this parapgaph.
290 
291     case tok::command:
292       if (S.isBlockCommand(Tok.getCommandName())) {
293         if (Content.size() == 0)
294           return parseBlockCommand();
295         break; // Block command ahead, finish this parapgaph.
296       }
297       if (S.isInlineCommand(Tok.getCommandName())) {
298         Content.push_back(parseInlineCommand());
299         continue;
300       }
301 
302       // Not a block command, not an inline command ==> an unknown command.
303       Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
304                                               Tok.getEndLocation(),
305                                               Tok.getCommandName()));
306       consumeToken();
307       continue;
308 
309     case tok::newline: {
310       consumeToken();
311       if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
312         consumeToken();
313         break; // Two newlines -- end of paragraph.
314       }
315       if (Content.size() > 0)
316         Content.back()->addTrailingNewline();
317       continue;
318     }
319 
320     // Don't deal with HTML tag soup now.
321     case tok::html_start_tag:
322       Content.push_back(parseHTMLStartTag());
323       continue;
324 
325     case tok::html_end_tag:
326       Content.push_back(parseHTMLEndTag());
327       continue;
328 
329     case tok::text:
330       Content.push_back(S.actOnText(Tok.getLocation(),
331                                     Tok.getEndLocation(),
332                                     Tok.getText()));
333       consumeToken();
334       continue;
335 
336     case tok::verbatim_block_line:
337     case tok::verbatim_block_end:
338     case tok::verbatim_line_text:
339     case tok::html_ident:
340     case tok::html_equals:
341     case tok::html_quoted_string:
342     case tok::html_greater:
343     case tok::html_slash_greater:
344       llvm_unreachable("should not see this token");
345     }
346     break;
347   }
348 
349   return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
350 }
351 
352 VerbatimBlockComment *Parser::parseVerbatimBlock() {
353   assert(Tok.is(tok::verbatim_block_begin));
354 
355   VerbatimBlockComment *VB =
356       S.actOnVerbatimBlockStart(Tok.getLocation(),
357                                 Tok.getVerbatimBlockName());
358   consumeToken();
359 
360   // Don't create an empty line if verbatim opening command is followed
361   // by a newline.
362   if (Tok.is(tok::newline))
363     consumeToken();
364 
365   SmallVector<VerbatimBlockLineComment *, 8> Lines;
366   while (Tok.is(tok::verbatim_block_line) ||
367          Tok.is(tok::newline)) {
368     VerbatimBlockLineComment *Line;
369     if (Tok.is(tok::verbatim_block_line)) {
370       Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
371                                       Tok.getVerbatimBlockText());
372       consumeToken();
373       if (Tok.is(tok::newline)) {
374         consumeToken();
375       }
376     } else {
377       // Empty line, just a tok::newline.
378       Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
379       consumeToken();
380     }
381     Lines.push_back(Line);
382   }
383 
384   if (Tok.is(tok::verbatim_block_end)) {
385     VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
386                                     Tok.getVerbatimBlockName(),
387                                     copyArray(llvm::makeArrayRef(Lines)));
388     consumeToken();
389   } else {
390     // Unterminated \\verbatim block
391     VB = S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
392                                     copyArray(llvm::makeArrayRef(Lines)));
393   }
394 
395   return VB;
396 }
397 
398 VerbatimLineComment *Parser::parseVerbatimLine() {
399   assert(Tok.is(tok::verbatim_line_name));
400 
401   Token NameTok = Tok;
402   consumeToken();
403 
404   SourceLocation TextBegin;
405   StringRef Text;
406   // Next token might not be a tok::verbatim_line_text if verbatim line
407   // starting command comes just before a newline or comment end.
408   if (Tok.is(tok::verbatim_line_text)) {
409     TextBegin = Tok.getLocation();
410     Text = Tok.getVerbatimLineText();
411   } else {
412     TextBegin = NameTok.getEndLocation();
413     Text = "";
414   }
415 
416   VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
417                                                 NameTok.getVerbatimLineName(),
418                                                 TextBegin,
419                                                 Text);
420   consumeToken();
421   return VL;
422 }
423 
424 BlockContentComment *Parser::parseBlockContent() {
425   switch (Tok.getKind()) {
426   case tok::text:
427   case tok::command:
428   case tok::html_start_tag:
429   case tok::html_end_tag:
430     return parseParagraphOrBlockCommand();
431 
432   case tok::verbatim_block_begin:
433     return parseVerbatimBlock();
434 
435   case tok::verbatim_line_name:
436     return parseVerbatimLine();
437 
438   case tok::eof:
439   case tok::newline:
440   case tok::verbatim_block_line:
441   case tok::verbatim_block_end:
442   case tok::verbatim_line_text:
443   case tok::html_ident:
444   case tok::html_equals:
445   case tok::html_quoted_string:
446   case tok::html_greater:
447   case tok::html_slash_greater:
448     llvm_unreachable("should not see this token");
449   }
450   llvm_unreachable("bogus token kind");
451 }
452 
453 FullComment *Parser::parseFullComment() {
454   // Skip newlines at the beginning of the comment.
455   while (Tok.is(tok::newline))
456     consumeToken();
457 
458   SmallVector<BlockContentComment *, 8> Blocks;
459   while (Tok.isNot(tok::eof)) {
460     Blocks.push_back(parseBlockContent());
461 
462     // Skip extra newlines after paragraph end.
463     while (Tok.is(tok::newline))
464       consumeToken();
465   }
466   return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
467 }
468 
469 } // end namespace comments
470 } // end namespace clang
471