1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "clang/AST/CommentParser.h" 11 #include "clang/AST/CommentSema.h" 12 #include "clang/AST/CommentDiagnostic.h" 13 #include "clang/Basic/SourceManager.h" 14 #include "llvm/Support/ErrorHandling.h" 15 16 namespace clang { 17 namespace comments { 18 19 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, 20 const SourceManager &SourceMgr, DiagnosticsEngine &Diags): 21 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) { 22 consumeToken(); 23 } 24 25 ParamCommandComment *Parser::parseParamCommandArgs( 26 ParamCommandComment *PC, 27 TextTokenRetokenizer &Retokenizer) { 28 Token Arg; 29 // Check if argument looks like direction specification: [dir] 30 // e.g., [in], [out], [in,out] 31 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) 32 PC = S.actOnParamCommandDirectionArg(PC, 33 Arg.getLocation(), 34 Arg.getEndLocation(), 35 Arg.getText()); 36 37 if (Retokenizer.lexWord(Arg)) 38 PC = S.actOnParamCommandParamNameArg(PC, 39 Arg.getLocation(), 40 Arg.getEndLocation(), 41 Arg.getText()); 42 43 return PC; 44 } 45 46 BlockCommandComment *Parser::parseBlockCommandArgs( 47 BlockCommandComment *BC, 48 TextTokenRetokenizer &Retokenizer, 49 unsigned NumArgs) { 50 typedef BlockCommandComment::Argument Argument; 51 Argument *Args = 52 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs]; 53 unsigned ParsedArgs = 0; 54 Token Arg; 55 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { 56 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), 57 Arg.getEndLocation()), 58 Arg.getText()); 59 ParsedArgs++; 60 } 61 62 return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); 63 } 64 65 BlockCommandComment *Parser::parseBlockCommand() { 66 assert(Tok.is(tok::command)); 67 68 ParamCommandComment *PC; 69 BlockCommandComment *BC; 70 bool IsParam = false; 71 unsigned NumArgs = 0; 72 if (S.isParamCommand(Tok.getCommandName())) { 73 IsParam = true; 74 PC = S.actOnParamCommandStart(Tok.getLocation(), 75 Tok.getEndLocation(), 76 Tok.getCommandName()); 77 } else { 78 NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName()); 79 BC = S.actOnBlockCommandStart(Tok.getLocation(), 80 Tok.getEndLocation(), 81 Tok.getCommandName()); 82 } 83 consumeToken(); 84 85 if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) { 86 // Block command ahead. We can't nest block commands, so pretend that this 87 // command has an empty argument. 88 ParagraphComment *PC = S.actOnParagraphComment( 89 ArrayRef<InlineContentComment *>()); 90 return S.actOnBlockCommandFinish(BC, PC); 91 } 92 93 if (IsParam || NumArgs > 0) { 94 // In order to parse command arguments we need to retokenize a few 95 // following text tokens. 96 TextTokenRetokenizer Retokenizer(Allocator); 97 while (Tok.is(tok::text)) { 98 if (Retokenizer.addToken(Tok)) 99 consumeToken(); 100 } 101 102 if (IsParam) 103 PC = parseParamCommandArgs(PC, Retokenizer); 104 else 105 BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs); 106 107 // Put back tokens we didn't use. 108 SmallVector<Token, 16> TextToks; 109 Token Text; 110 while (Retokenizer.lexText(Text)) { 111 TextToks.push_back(Text); 112 } 113 putBack(TextToks); 114 } 115 116 BlockContentComment *Block = parseParagraphOrBlockCommand(); 117 // Since we have checked for a block command, we should have parsed a 118 // paragraph. 119 if (IsParam) 120 return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block)); 121 else 122 return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block)); 123 } 124 125 InlineCommandComment *Parser::parseInlineCommand() { 126 assert(Tok.is(tok::command)); 127 128 const Token CommandTok = Tok; 129 consumeToken(); 130 131 TextTokenRetokenizer Retokenizer(Allocator); 132 while (Tok.is(tok::text)) { 133 if (Retokenizer.addToken(Tok)) 134 consumeToken(); 135 } 136 137 Token ArgTok; 138 bool ArgTokValid = Retokenizer.lexWord(ArgTok); 139 140 InlineCommandComment *IC; 141 if (ArgTokValid) { 142 IC = S.actOnInlineCommand(CommandTok.getLocation(), 143 CommandTok.getEndLocation(), 144 CommandTok.getCommandName(), 145 ArgTok.getLocation(), 146 ArgTok.getEndLocation(), 147 ArgTok.getText()); 148 } else { 149 IC = S.actOnInlineCommand(CommandTok.getLocation(), 150 CommandTok.getEndLocation(), 151 CommandTok.getCommandName()); 152 } 153 154 Token Text; 155 while (Retokenizer.lexText(Text)) 156 putBack(Text); 157 158 return IC; 159 } 160 161 HTMLStartTagComment *Parser::parseHTMLStartTag() { 162 assert(Tok.is(tok::html_start_tag)); 163 HTMLStartTagComment *HST = 164 S.actOnHTMLStartTagStart(Tok.getLocation(), 165 Tok.getHTMLTagStartName()); 166 consumeToken(); 167 168 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; 169 while (true) { 170 switch (Tok.getKind()) { 171 case tok::html_ident: { 172 Token Ident = Tok; 173 consumeToken(); 174 if (Tok.isNot(tok::html_equals)) { 175 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 176 Ident.getHTMLIdent())); 177 continue; 178 } 179 Token Equals = Tok; 180 consumeToken(); 181 if (Tok.isNot(tok::html_quoted_string)) { 182 Diag(Tok.getLocation(), 183 diag::warn_doc_html_start_tag_expected_quoted_string) 184 << SourceRange(Equals.getLocation()); 185 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 186 Ident.getHTMLIdent())); 187 while (Tok.is(tok::html_equals) || 188 Tok.is(tok::html_quoted_string)) 189 consumeToken(); 190 continue; 191 } 192 Attrs.push_back(HTMLStartTagComment::Attribute( 193 Ident.getLocation(), 194 Ident.getHTMLIdent(), 195 Equals.getLocation(), 196 SourceRange(Tok.getLocation(), 197 Tok.getEndLocation()), 198 Tok.getHTMLQuotedString())); 199 consumeToken(); 200 continue; 201 } 202 203 case tok::html_greater: 204 HST = S.actOnHTMLStartTagFinish(HST, 205 copyArray(llvm::makeArrayRef(Attrs)), 206 Tok.getLocation(), 207 /* IsSelfClosing = */ false); 208 consumeToken(); 209 return HST; 210 211 case tok::html_slash_greater: 212 HST = S.actOnHTMLStartTagFinish(HST, 213 copyArray(llvm::makeArrayRef(Attrs)), 214 Tok.getLocation(), 215 /* IsSelfClosing = */ true); 216 consumeToken(); 217 return HST; 218 219 case tok::html_equals: 220 case tok::html_quoted_string: 221 Diag(Tok.getLocation(), 222 diag::warn_doc_html_start_tag_expected_ident_or_greater); 223 while (Tok.is(tok::html_equals) || 224 Tok.is(tok::html_quoted_string)) 225 consumeToken(); 226 if (Tok.is(tok::html_ident) || 227 Tok.is(tok::html_greater) || 228 Tok.is(tok::html_slash_greater)) 229 continue; 230 231 return S.actOnHTMLStartTagFinish(HST, 232 copyArray(llvm::makeArrayRef(Attrs)), 233 SourceLocation(), 234 /* IsSelfClosing = */ false); 235 236 default: 237 // Not a token from an HTML start tag. Thus HTML tag prematurely ended. 238 HST = S.actOnHTMLStartTagFinish(HST, 239 copyArray(llvm::makeArrayRef(Attrs)), 240 SourceLocation(), 241 /* IsSelfClosing = */ false); 242 bool StartLineInvalid; 243 const unsigned StartLine = SourceMgr.getPresumedLineNumber( 244 HST->getLocation(), 245 &StartLineInvalid); 246 bool EndLineInvalid; 247 const unsigned EndLine = SourceMgr.getPresumedLineNumber( 248 Tok.getLocation(), 249 &EndLineInvalid); 250 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) 251 Diag(Tok.getLocation(), 252 diag::warn_doc_html_start_tag_expected_ident_or_greater) 253 << HST->getSourceRange(); 254 else { 255 Diag(Tok.getLocation(), 256 diag::warn_doc_html_start_tag_expected_ident_or_greater); 257 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) 258 << HST->getSourceRange(); 259 } 260 return HST; 261 } 262 } 263 } 264 265 HTMLEndTagComment *Parser::parseHTMLEndTag() { 266 assert(Tok.is(tok::html_end_tag)); 267 Token TokEndTag = Tok; 268 consumeToken(); 269 SourceLocation Loc; 270 if (Tok.is(tok::html_greater)) { 271 Loc = Tok.getLocation(); 272 consumeToken(); 273 } 274 275 return S.actOnHTMLEndTag(TokEndTag.getLocation(), 276 Loc, 277 TokEndTag.getHTMLTagEndName()); 278 } 279 280 BlockContentComment *Parser::parseParagraphOrBlockCommand() { 281 SmallVector<InlineContentComment *, 8> Content; 282 283 while (true) { 284 switch (Tok.getKind()) { 285 case tok::verbatim_block_begin: 286 case tok::verbatim_line_name: 287 case tok::eof: 288 assert(Content.size() != 0); 289 break; // Block content or EOF ahead, finish this parapgaph. 290 291 case tok::command: 292 if (S.isBlockCommand(Tok.getCommandName())) { 293 if (Content.size() == 0) 294 return parseBlockCommand(); 295 break; // Block command ahead, finish this parapgaph. 296 } 297 if (S.isInlineCommand(Tok.getCommandName())) { 298 Content.push_back(parseInlineCommand()); 299 continue; 300 } 301 302 // Not a block command, not an inline command ==> an unknown command. 303 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 304 Tok.getEndLocation(), 305 Tok.getCommandName())); 306 consumeToken(); 307 continue; 308 309 case tok::newline: { 310 consumeToken(); 311 if (Tok.is(tok::newline) || Tok.is(tok::eof)) { 312 consumeToken(); 313 break; // Two newlines -- end of paragraph. 314 } 315 if (Content.size() > 0) 316 Content.back()->addTrailingNewline(); 317 continue; 318 } 319 320 // Don't deal with HTML tag soup now. 321 case tok::html_start_tag: 322 Content.push_back(parseHTMLStartTag()); 323 continue; 324 325 case tok::html_end_tag: 326 Content.push_back(parseHTMLEndTag()); 327 continue; 328 329 case tok::text: 330 Content.push_back(S.actOnText(Tok.getLocation(), 331 Tok.getEndLocation(), 332 Tok.getText())); 333 consumeToken(); 334 continue; 335 336 case tok::verbatim_block_line: 337 case tok::verbatim_block_end: 338 case tok::verbatim_line_text: 339 case tok::html_ident: 340 case tok::html_equals: 341 case tok::html_quoted_string: 342 case tok::html_greater: 343 case tok::html_slash_greater: 344 llvm_unreachable("should not see this token"); 345 } 346 break; 347 } 348 349 return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content))); 350 } 351 352 VerbatimBlockComment *Parser::parseVerbatimBlock() { 353 assert(Tok.is(tok::verbatim_block_begin)); 354 355 VerbatimBlockComment *VB = 356 S.actOnVerbatimBlockStart(Tok.getLocation(), 357 Tok.getVerbatimBlockName()); 358 consumeToken(); 359 360 // Don't create an empty line if verbatim opening command is followed 361 // by a newline. 362 if (Tok.is(tok::newline)) 363 consumeToken(); 364 365 SmallVector<VerbatimBlockLineComment *, 8> Lines; 366 while (Tok.is(tok::verbatim_block_line) || 367 Tok.is(tok::newline)) { 368 VerbatimBlockLineComment *Line; 369 if (Tok.is(tok::verbatim_block_line)) { 370 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), 371 Tok.getVerbatimBlockText()); 372 consumeToken(); 373 if (Tok.is(tok::newline)) { 374 consumeToken(); 375 } 376 } else { 377 // Empty line, just a tok::newline. 378 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); 379 consumeToken(); 380 } 381 Lines.push_back(Line); 382 } 383 384 if (Tok.is(tok::verbatim_block_end)) { 385 VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), 386 Tok.getVerbatimBlockName(), 387 copyArray(llvm::makeArrayRef(Lines))); 388 consumeToken(); 389 } else { 390 // Unterminated \\verbatim block 391 VB = S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", 392 copyArray(llvm::makeArrayRef(Lines))); 393 } 394 395 return VB; 396 } 397 398 VerbatimLineComment *Parser::parseVerbatimLine() { 399 assert(Tok.is(tok::verbatim_line_name)); 400 401 Token NameTok = Tok; 402 consumeToken(); 403 404 SourceLocation TextBegin; 405 StringRef Text; 406 // Next token might not be a tok::verbatim_line_text if verbatim line 407 // starting command comes just before a newline or comment end. 408 if (Tok.is(tok::verbatim_line_text)) { 409 TextBegin = Tok.getLocation(); 410 Text = Tok.getVerbatimLineText(); 411 } else { 412 TextBegin = NameTok.getEndLocation(); 413 Text = ""; 414 } 415 416 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), 417 NameTok.getVerbatimLineName(), 418 TextBegin, 419 Text); 420 consumeToken(); 421 return VL; 422 } 423 424 BlockContentComment *Parser::parseBlockContent() { 425 switch (Tok.getKind()) { 426 case tok::text: 427 case tok::command: 428 case tok::html_start_tag: 429 case tok::html_end_tag: 430 return parseParagraphOrBlockCommand(); 431 432 case tok::verbatim_block_begin: 433 return parseVerbatimBlock(); 434 435 case tok::verbatim_line_name: 436 return parseVerbatimLine(); 437 438 case tok::eof: 439 case tok::newline: 440 case tok::verbatim_block_line: 441 case tok::verbatim_block_end: 442 case tok::verbatim_line_text: 443 case tok::html_ident: 444 case tok::html_equals: 445 case tok::html_quoted_string: 446 case tok::html_greater: 447 case tok::html_slash_greater: 448 llvm_unreachable("should not see this token"); 449 } 450 llvm_unreachable("bogus token kind"); 451 } 452 453 FullComment *Parser::parseFullComment() { 454 // Skip newlines at the beginning of the comment. 455 while (Tok.is(tok::newline)) 456 consumeToken(); 457 458 SmallVector<BlockContentComment *, 8> Blocks; 459 while (Tok.isNot(tok::eof)) { 460 Blocks.push_back(parseBlockContent()); 461 462 // Skip extra newlines after paragraph end. 463 while (Tok.is(tok::newline)) 464 consumeToken(); 465 } 466 return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks))); 467 } 468 469 } // end namespace comments 470 } // end namespace clang 471