xref: /llvm-project/clang/lib/AST/CommentParser.cpp (revision b03cc7e9f443b56e3048c72d55d6cbed5b83d6ab)
1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "clang/AST/CommentParser.h"
11 #include "clang/AST/CommentSema.h"
12 #include "clang/AST/CommentDiagnostic.h"
13 #include "clang/Basic/SourceManager.h"
14 #include "llvm/Support/ErrorHandling.h"
15 
16 namespace clang {
17 namespace comments {
18 
19 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
20                const SourceManager &SourceMgr, DiagnosticsEngine &Diags):
21     L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) {
22   consumeToken();
23 }
24 
25 ParamCommandComment *Parser::parseParamCommandArgs(
26     ParamCommandComment *PC,
27     TextTokenRetokenizer &Retokenizer) {
28   Token Arg;
29   // Check if argument looks like direction specification: [dir]
30   // e.g., [in], [out], [in,out]
31   if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
32     PC = S.actOnParamCommandDirectionArg(PC,
33                                          Arg.getLocation(),
34                                          Arg.getEndLocation(),
35                                          Arg.getText());
36 
37   if (Retokenizer.lexWord(Arg))
38     PC = S.actOnParamCommandParamNameArg(PC,
39                                          Arg.getLocation(),
40                                          Arg.getEndLocation(),
41                                          Arg.getText());
42 
43   return PC;
44 }
45 
46 BlockCommandComment *Parser::parseBlockCommandArgs(
47     BlockCommandComment *BC,
48     TextTokenRetokenizer &Retokenizer,
49     unsigned NumArgs) {
50   typedef BlockCommandComment::Argument Argument;
51   Argument *Args =
52       new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
53   unsigned ParsedArgs = 0;
54   Token Arg;
55   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
56     Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
57                                             Arg.getEndLocation()),
58                                 Arg.getText());
59     ParsedArgs++;
60   }
61 
62   return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
63 }
64 
65 BlockCommandComment *Parser::parseBlockCommand() {
66   assert(Tok.is(tok::command));
67 
68   ParamCommandComment *PC;
69   BlockCommandComment *BC;
70   bool IsParam = false;
71   unsigned NumArgs = 0;
72   if (S.isParamCommand(Tok.getCommandName())) {
73     IsParam = true;
74     PC = S.actOnParamCommandStart(Tok.getLocation(),
75                                   Tok.getEndLocation(),
76                                   Tok.getCommandName());
77   } else {
78     NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName());
79     BC = S.actOnBlockCommandStart(Tok.getLocation(),
80                                   Tok.getEndLocation(),
81                                   Tok.getCommandName());
82   }
83   consumeToken();
84 
85   if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) {
86     // Block command ahead.  We can't nest block commands, so pretend that this
87     // command has an empty argument.
88     ParagraphComment *PC = S.actOnParagraphComment(
89                                 ArrayRef<InlineContentComment *>());
90     return S.actOnBlockCommandFinish(BC, PC);
91   }
92 
93   if (IsParam || NumArgs > 0) {
94     // In order to parse command arguments we need to retokenize a few
95     // following text tokens.
96     TextTokenRetokenizer Retokenizer(Allocator);
97     while (Tok.is(tok::text)) {
98       if (Retokenizer.addToken(Tok))
99         consumeToken();
100     }
101 
102     if (IsParam)
103       PC = parseParamCommandArgs(PC, Retokenizer);
104     else
105       BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs);
106 
107     // Put back tokens we didn't use.
108     Token Text;
109     while (Retokenizer.lexText(Text))
110       putBack(Text);
111   }
112 
113   BlockContentComment *Block = parseParagraphOrBlockCommand();
114   // Since we have checked for a block command, we should have parsed a
115   // paragraph.
116   if (IsParam)
117     return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block));
118   else
119     return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block));
120 }
121 
122 InlineCommandComment *Parser::parseInlineCommand() {
123   assert(Tok.is(tok::command));
124 
125   const Token CommandTok = Tok;
126   consumeToken();
127 
128   TextTokenRetokenizer Retokenizer(Allocator);
129   while (Tok.is(tok::text)) {
130     if (Retokenizer.addToken(Tok))
131       consumeToken();
132   }
133 
134   Token ArgTok;
135   bool ArgTokValid = Retokenizer.lexWord(ArgTok);
136 
137   InlineCommandComment *IC;
138   if (ArgTokValid) {
139     IC = S.actOnInlineCommand(CommandTok.getLocation(),
140                               CommandTok.getEndLocation(),
141                               CommandTok.getCommandName(),
142                               ArgTok.getLocation(),
143                               ArgTok.getEndLocation(),
144                               ArgTok.getText());
145   } else {
146     IC = S.actOnInlineCommand(CommandTok.getLocation(),
147                               CommandTok.getEndLocation(),
148                               CommandTok.getCommandName());
149   }
150 
151   Token Text;
152   while (Retokenizer.lexText(Text))
153     putBack(Text);
154 
155   return IC;
156 }
157 
158 HTMLStartTagComment *Parser::parseHTMLStartTag() {
159   assert(Tok.is(tok::html_start_tag));
160   HTMLStartTagComment *HST =
161       S.actOnHTMLStartTagStart(Tok.getLocation(),
162                                Tok.getHTMLTagStartName());
163   consumeToken();
164 
165   SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
166   while (true) {
167     switch (Tok.getKind()) {
168     case tok::html_ident: {
169       Token Ident = Tok;
170       consumeToken();
171       if (Tok.isNot(tok::html_equals)) {
172         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
173                                                        Ident.getHTMLIdent()));
174         continue;
175       }
176       Token Equals = Tok;
177       consumeToken();
178       if (Tok.isNot(tok::html_quoted_string)) {
179         Diag(Tok.getLocation(),
180              diag::warn_doc_html_start_tag_expected_quoted_string)
181           << SourceRange(Equals.getLocation());
182         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
183                                                        Ident.getHTMLIdent()));
184         while (Tok.is(tok::html_equals) ||
185                Tok.is(tok::html_quoted_string))
186           consumeToken();
187         continue;
188       }
189       Attrs.push_back(HTMLStartTagComment::Attribute(
190                               Ident.getLocation(),
191                               Ident.getHTMLIdent(),
192                               Equals.getLocation(),
193                               SourceRange(Tok.getLocation(),
194                                           Tok.getEndLocation()),
195                               Tok.getHTMLQuotedString()));
196       consumeToken();
197       continue;
198     }
199 
200     case tok::html_greater:
201       HST = S.actOnHTMLStartTagFinish(HST,
202                                       copyArray(llvm::makeArrayRef(Attrs)),
203                                       Tok.getLocation(),
204                                       /* IsSelfClosing = */ false);
205       consumeToken();
206       return HST;
207 
208     case tok::html_slash_greater:
209       HST = S.actOnHTMLStartTagFinish(HST,
210                                       copyArray(llvm::makeArrayRef(Attrs)),
211                                       Tok.getLocation(),
212                                       /* IsSelfClosing = */ true);
213       consumeToken();
214       return HST;
215 
216     case tok::html_equals:
217     case tok::html_quoted_string:
218       Diag(Tok.getLocation(),
219            diag::warn_doc_html_start_tag_expected_ident_or_greater);
220       while (Tok.is(tok::html_equals) ||
221              Tok.is(tok::html_quoted_string))
222         consumeToken();
223       if (Tok.is(tok::html_ident) ||
224           Tok.is(tok::html_greater) ||
225           Tok.is(tok::html_slash_greater))
226         continue;
227 
228       return S.actOnHTMLStartTagFinish(HST,
229                                        copyArray(llvm::makeArrayRef(Attrs)),
230                                        SourceLocation(),
231                                        /* IsSelfClosing = */ false);
232 
233     default:
234       // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.
235       HST = S.actOnHTMLStartTagFinish(HST,
236                                       copyArray(llvm::makeArrayRef(Attrs)),
237                                       SourceLocation(),
238                                       /* IsSelfClosing = */ false);
239       bool StartLineInvalid;
240       const unsigned StartLine = SourceMgr.getPresumedLineNumber(
241                                                   HST->getLocation(),
242                                                   &StartLineInvalid);
243       bool EndLineInvalid;
244       const unsigned EndLine = SourceMgr.getPresumedLineNumber(
245                                                   Tok.getLocation(),
246                                                   &EndLineInvalid);
247       if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
248         Diag(Tok.getLocation(),
249              diag::warn_doc_html_start_tag_expected_ident_or_greater)
250           << HST->getSourceRange();
251       else {
252         Diag(Tok.getLocation(),
253              diag::warn_doc_html_start_tag_expected_ident_or_greater);
254         Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
255           << HST->getSourceRange();
256       }
257       return HST;
258     }
259   }
260 }
261 
262 HTMLEndTagComment *Parser::parseHTMLEndTag() {
263   assert(Tok.is(tok::html_end_tag));
264   Token TokEndTag = Tok;
265   consumeToken();
266   SourceLocation Loc;
267   if (Tok.is(tok::html_greater)) {
268     Loc = Tok.getLocation();
269     consumeToken();
270   }
271 
272   return S.actOnHTMLEndTag(TokEndTag.getLocation(),
273                            Loc,
274                            TokEndTag.getHTMLTagEndName());
275 }
276 
277 BlockContentComment *Parser::parseParagraphOrBlockCommand() {
278   SmallVector<InlineContentComment *, 8> Content;
279 
280   while (true) {
281     switch (Tok.getKind()) {
282     case tok::verbatim_block_begin:
283     case tok::verbatim_line_name:
284     case tok::eof:
285       assert(Content.size() != 0);
286       break; // Block content or EOF ahead, finish this parapgaph.
287 
288     case tok::command:
289       if (S.isBlockCommand(Tok.getCommandName())) {
290         if (Content.size() == 0)
291           return parseBlockCommand();
292         break; // Block command ahead, finish this parapgaph.
293       }
294       if (S.isInlineCommand(Tok.getCommandName())) {
295         Content.push_back(parseInlineCommand());
296         continue;
297       }
298 
299       // Not a block command, not an inline command ==> an unknown command.
300       Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
301                                               Tok.getEndLocation(),
302                                               Tok.getCommandName()));
303       consumeToken();
304       continue;
305 
306     case tok::newline: {
307       consumeToken();
308       if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
309         consumeToken();
310         break; // Two newlines -- end of paragraph.
311       }
312       if (Content.size() > 0)
313         Content.back()->addTrailingNewline();
314       continue;
315     }
316 
317     // Don't deal with HTML tag soup now.
318     case tok::html_start_tag:
319       Content.push_back(parseHTMLStartTag());
320       continue;
321 
322     case tok::html_end_tag:
323       Content.push_back(parseHTMLEndTag());
324       continue;
325 
326     case tok::text:
327       Content.push_back(S.actOnText(Tok.getLocation(),
328                                     Tok.getEndLocation(),
329                                     Tok.getText()));
330       consumeToken();
331       continue;
332 
333     case tok::verbatim_block_line:
334     case tok::verbatim_block_end:
335     case tok::verbatim_line_text:
336     case tok::html_ident:
337     case tok::html_equals:
338     case tok::html_quoted_string:
339     case tok::html_greater:
340     case tok::html_slash_greater:
341       llvm_unreachable("should not see this token");
342     }
343     break;
344   }
345 
346   return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content)));
347 }
348 
349 VerbatimBlockComment *Parser::parseVerbatimBlock() {
350   assert(Tok.is(tok::verbatim_block_begin));
351 
352   VerbatimBlockComment *VB =
353       S.actOnVerbatimBlockStart(Tok.getLocation(),
354                                 Tok.getVerbatimBlockName());
355   consumeToken();
356 
357   // Don't create an empty line if verbatim opening command is followed
358   // by a newline.
359   if (Tok.is(tok::newline))
360     consumeToken();
361 
362   SmallVector<VerbatimBlockLineComment *, 8> Lines;
363   while (Tok.is(tok::verbatim_block_line) ||
364          Tok.is(tok::newline)) {
365     VerbatimBlockLineComment *Line;
366     if (Tok.is(tok::verbatim_block_line)) {
367       Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
368                                       Tok.getVerbatimBlockText());
369       consumeToken();
370       if (Tok.is(tok::newline)) {
371         consumeToken();
372       }
373     } else {
374       // Empty line, just a tok::newline.
375       Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
376       consumeToken();
377     }
378     Lines.push_back(Line);
379   }
380 
381   assert(Tok.is(tok::verbatim_block_end));
382   VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
383                                   Tok.getVerbatimBlockName(),
384                                   copyArray(llvm::makeArrayRef(Lines)));
385   consumeToken();
386 
387   return VB;
388 }
389 
390 VerbatimLineComment *Parser::parseVerbatimLine() {
391   assert(Tok.is(tok::verbatim_line_name));
392 
393   Token NameTok = Tok;
394   consumeToken();
395 
396   SourceLocation TextBegin;
397   StringRef Text;
398   // Next token might not be a tok::verbatim_line_text if verbatim line
399   // starting command comes just before a newline or comment end.
400   if (Tok.is(tok::verbatim_line_text)) {
401     TextBegin = Tok.getLocation();
402     Text = Tok.getVerbatimLineText();
403   } else {
404     TextBegin = NameTok.getEndLocation();
405     Text = "";
406   }
407 
408   VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
409                                                 NameTok.getVerbatimLineName(),
410                                                 TextBegin,
411                                                 Text);
412   consumeToken();
413   return VL;
414 }
415 
416 BlockContentComment *Parser::parseBlockContent() {
417   switch (Tok.getKind()) {
418   case tok::text:
419   case tok::command:
420   case tok::html_start_tag:
421   case tok::html_end_tag:
422     return parseParagraphOrBlockCommand();
423 
424   case tok::verbatim_block_begin:
425     return parseVerbatimBlock();
426 
427   case tok::verbatim_line_name:
428     return parseVerbatimLine();
429 
430   case tok::eof:
431   case tok::newline:
432   case tok::verbatim_block_line:
433   case tok::verbatim_block_end:
434   case tok::verbatim_line_text:
435   case tok::html_ident:
436   case tok::html_equals:
437   case tok::html_quoted_string:
438   case tok::html_greater:
439   case tok::html_slash_greater:
440     llvm_unreachable("should not see this token");
441   }
442   llvm_unreachable("bogus token kind");
443 }
444 
445 FullComment *Parser::parseFullComment() {
446   // Skip newlines at the beginning of the comment.
447   while (Tok.is(tok::newline))
448     consumeToken();
449 
450   SmallVector<BlockContentComment *, 8> Blocks;
451   while (Tok.isNot(tok::eof)) {
452     Blocks.push_back(parseBlockContent());
453 
454     // Skip extra newlines after paragraph end.
455     while (Tok.is(tok::newline))
456       consumeToken();
457   }
458   return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks)));
459 }
460 
461 } // end namespace comments
462 } // end namespace clang
463