1 //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "clang/AST/CommentParser.h" 11 #include "clang/AST/CommentSema.h" 12 #include "clang/AST/CommentDiagnostic.h" 13 #include "clang/Basic/SourceManager.h" 14 #include "llvm/Support/ErrorHandling.h" 15 16 namespace clang { 17 namespace comments { 18 19 Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, 20 const SourceManager &SourceMgr, DiagnosticsEngine &Diags): 21 L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags) { 22 consumeToken(); 23 } 24 25 ParamCommandComment *Parser::parseParamCommandArgs( 26 ParamCommandComment *PC, 27 TextTokenRetokenizer &Retokenizer) { 28 Token Arg; 29 // Check if argument looks like direction specification: [dir] 30 // e.g., [in], [out], [in,out] 31 if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) 32 PC = S.actOnParamCommandDirectionArg(PC, 33 Arg.getLocation(), 34 Arg.getEndLocation(), 35 Arg.getText()); 36 37 if (Retokenizer.lexWord(Arg)) 38 PC = S.actOnParamCommandParamNameArg(PC, 39 Arg.getLocation(), 40 Arg.getEndLocation(), 41 Arg.getText()); 42 43 return PC; 44 } 45 46 BlockCommandComment *Parser::parseBlockCommandArgs( 47 BlockCommandComment *BC, 48 TextTokenRetokenizer &Retokenizer, 49 unsigned NumArgs) { 50 typedef BlockCommandComment::Argument Argument; 51 Argument *Args = 52 new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs]; 53 unsigned ParsedArgs = 0; 54 Token Arg; 55 while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { 56 Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(), 57 Arg.getEndLocation()), 58 Arg.getText()); 59 ParsedArgs++; 60 } 61 62 return S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs)); 63 } 64 65 BlockCommandComment *Parser::parseBlockCommand() { 66 assert(Tok.is(tok::command)); 67 68 ParamCommandComment *PC; 69 BlockCommandComment *BC; 70 bool IsParam = false; 71 unsigned NumArgs = 0; 72 if (S.isParamCommand(Tok.getCommandName())) { 73 IsParam = true; 74 PC = S.actOnParamCommandStart(Tok.getLocation(), 75 Tok.getEndLocation(), 76 Tok.getCommandName()); 77 } else { 78 NumArgs = S.getBlockCommandNumArgs(Tok.getCommandName()); 79 BC = S.actOnBlockCommandStart(Tok.getLocation(), 80 Tok.getEndLocation(), 81 Tok.getCommandName()); 82 } 83 consumeToken(); 84 85 if (Tok.is(tok::command) && S.isBlockCommand(Tok.getCommandName())) { 86 // Block command ahead. We can't nest block commands, so pretend that this 87 // command has an empty argument. 88 ParagraphComment *PC = S.actOnParagraphComment( 89 ArrayRef<InlineContentComment *>()); 90 return S.actOnBlockCommandFinish(BC, PC); 91 } 92 93 if (IsParam || NumArgs > 0) { 94 // In order to parse command arguments we need to retokenize a few 95 // following text tokens. 96 TextTokenRetokenizer Retokenizer(Allocator); 97 while (Tok.is(tok::text)) { 98 if (Retokenizer.addToken(Tok)) 99 consumeToken(); 100 } 101 102 if (IsParam) 103 PC = parseParamCommandArgs(PC, Retokenizer); 104 else 105 BC = parseBlockCommandArgs(BC, Retokenizer, NumArgs); 106 107 // Put back tokens we didn't use. 108 Token Text; 109 while (Retokenizer.lexText(Text)) 110 putBack(Text); 111 } 112 113 BlockContentComment *Block = parseParagraphOrBlockCommand(); 114 // Since we have checked for a block command, we should have parsed a 115 // paragraph. 116 if (IsParam) 117 return S.actOnParamCommandFinish(PC, cast<ParagraphComment>(Block)); 118 else 119 return S.actOnBlockCommandFinish(BC, cast<ParagraphComment>(Block)); 120 } 121 122 InlineCommandComment *Parser::parseInlineCommand() { 123 assert(Tok.is(tok::command)); 124 125 const Token CommandTok = Tok; 126 consumeToken(); 127 128 TextTokenRetokenizer Retokenizer(Allocator); 129 while (Tok.is(tok::text)) { 130 if (Retokenizer.addToken(Tok)) 131 consumeToken(); 132 } 133 134 Token ArgTok; 135 bool ArgTokValid = Retokenizer.lexWord(ArgTok); 136 137 InlineCommandComment *IC; 138 if (ArgTokValid) { 139 IC = S.actOnInlineCommand(CommandTok.getLocation(), 140 CommandTok.getEndLocation(), 141 CommandTok.getCommandName(), 142 ArgTok.getLocation(), 143 ArgTok.getEndLocation(), 144 ArgTok.getText()); 145 } else { 146 IC = S.actOnInlineCommand(CommandTok.getLocation(), 147 CommandTok.getEndLocation(), 148 CommandTok.getCommandName()); 149 } 150 151 Token Text; 152 while (Retokenizer.lexText(Text)) 153 putBack(Text); 154 155 return IC; 156 } 157 158 HTMLStartTagComment *Parser::parseHTMLStartTag() { 159 assert(Tok.is(tok::html_start_tag)); 160 HTMLStartTagComment *HST = 161 S.actOnHTMLStartTagStart(Tok.getLocation(), 162 Tok.getHTMLTagStartName()); 163 consumeToken(); 164 165 SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; 166 while (true) { 167 switch (Tok.getKind()) { 168 case tok::html_ident: { 169 Token Ident = Tok; 170 consumeToken(); 171 if (Tok.isNot(tok::html_equals)) { 172 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 173 Ident.getHTMLIdent())); 174 continue; 175 } 176 Token Equals = Tok; 177 consumeToken(); 178 if (Tok.isNot(tok::html_quoted_string)) { 179 Diag(Tok.getLocation(), 180 diag::warn_doc_html_start_tag_expected_quoted_string) 181 << SourceRange(Equals.getLocation()); 182 Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 183 Ident.getHTMLIdent())); 184 while (Tok.is(tok::html_equals) || 185 Tok.is(tok::html_quoted_string)) 186 consumeToken(); 187 continue; 188 } 189 Attrs.push_back(HTMLStartTagComment::Attribute( 190 Ident.getLocation(), 191 Ident.getHTMLIdent(), 192 Equals.getLocation(), 193 SourceRange(Tok.getLocation(), 194 Tok.getEndLocation()), 195 Tok.getHTMLQuotedString())); 196 consumeToken(); 197 continue; 198 } 199 200 case tok::html_greater: 201 HST = S.actOnHTMLStartTagFinish(HST, 202 copyArray(llvm::makeArrayRef(Attrs)), 203 Tok.getLocation(), 204 /* IsSelfClosing = */ false); 205 consumeToken(); 206 return HST; 207 208 case tok::html_slash_greater: 209 HST = S.actOnHTMLStartTagFinish(HST, 210 copyArray(llvm::makeArrayRef(Attrs)), 211 Tok.getLocation(), 212 /* IsSelfClosing = */ true); 213 consumeToken(); 214 return HST; 215 216 case tok::html_equals: 217 case tok::html_quoted_string: 218 Diag(Tok.getLocation(), 219 diag::warn_doc_html_start_tag_expected_ident_or_greater); 220 while (Tok.is(tok::html_equals) || 221 Tok.is(tok::html_quoted_string)) 222 consumeToken(); 223 if (Tok.is(tok::html_ident) || 224 Tok.is(tok::html_greater) || 225 Tok.is(tok::html_slash_greater)) 226 continue; 227 228 return S.actOnHTMLStartTagFinish(HST, 229 copyArray(llvm::makeArrayRef(Attrs)), 230 SourceLocation(), 231 /* IsSelfClosing = */ false); 232 233 default: 234 // Not a token from an HTML start tag. Thus HTML tag prematurely ended. 235 HST = S.actOnHTMLStartTagFinish(HST, 236 copyArray(llvm::makeArrayRef(Attrs)), 237 SourceLocation(), 238 /* IsSelfClosing = */ false); 239 bool StartLineInvalid; 240 const unsigned StartLine = SourceMgr.getPresumedLineNumber( 241 HST->getLocation(), 242 &StartLineInvalid); 243 bool EndLineInvalid; 244 const unsigned EndLine = SourceMgr.getPresumedLineNumber( 245 Tok.getLocation(), 246 &EndLineInvalid); 247 if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) 248 Diag(Tok.getLocation(), 249 diag::warn_doc_html_start_tag_expected_ident_or_greater) 250 << HST->getSourceRange(); 251 else { 252 Diag(Tok.getLocation(), 253 diag::warn_doc_html_start_tag_expected_ident_or_greater); 254 Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) 255 << HST->getSourceRange(); 256 } 257 return HST; 258 } 259 } 260 } 261 262 HTMLEndTagComment *Parser::parseHTMLEndTag() { 263 assert(Tok.is(tok::html_end_tag)); 264 Token TokEndTag = Tok; 265 consumeToken(); 266 SourceLocation Loc; 267 if (Tok.is(tok::html_greater)) { 268 Loc = Tok.getLocation(); 269 consumeToken(); 270 } 271 272 return S.actOnHTMLEndTag(TokEndTag.getLocation(), 273 Loc, 274 TokEndTag.getHTMLTagEndName()); 275 } 276 277 BlockContentComment *Parser::parseParagraphOrBlockCommand() { 278 SmallVector<InlineContentComment *, 8> Content; 279 280 while (true) { 281 switch (Tok.getKind()) { 282 case tok::verbatim_block_begin: 283 case tok::verbatim_line_name: 284 case tok::eof: 285 assert(Content.size() != 0); 286 break; // Block content or EOF ahead, finish this parapgaph. 287 288 case tok::command: 289 if (S.isBlockCommand(Tok.getCommandName())) { 290 if (Content.size() == 0) 291 return parseBlockCommand(); 292 break; // Block command ahead, finish this parapgaph. 293 } 294 if (S.isInlineCommand(Tok.getCommandName())) { 295 Content.push_back(parseInlineCommand()); 296 continue; 297 } 298 299 // Not a block command, not an inline command ==> an unknown command. 300 Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 301 Tok.getEndLocation(), 302 Tok.getCommandName())); 303 consumeToken(); 304 continue; 305 306 case tok::newline: { 307 consumeToken(); 308 if (Tok.is(tok::newline) || Tok.is(tok::eof)) { 309 consumeToken(); 310 break; // Two newlines -- end of paragraph. 311 } 312 if (Content.size() > 0) 313 Content.back()->addTrailingNewline(); 314 continue; 315 } 316 317 // Don't deal with HTML tag soup now. 318 case tok::html_start_tag: 319 Content.push_back(parseHTMLStartTag()); 320 continue; 321 322 case tok::html_end_tag: 323 Content.push_back(parseHTMLEndTag()); 324 continue; 325 326 case tok::text: 327 Content.push_back(S.actOnText(Tok.getLocation(), 328 Tok.getEndLocation(), 329 Tok.getText())); 330 consumeToken(); 331 continue; 332 333 case tok::verbatim_block_line: 334 case tok::verbatim_block_end: 335 case tok::verbatim_line_text: 336 case tok::html_ident: 337 case tok::html_equals: 338 case tok::html_quoted_string: 339 case tok::html_greater: 340 case tok::html_slash_greater: 341 llvm_unreachable("should not see this token"); 342 } 343 break; 344 } 345 346 return S.actOnParagraphComment(copyArray(llvm::makeArrayRef(Content))); 347 } 348 349 VerbatimBlockComment *Parser::parseVerbatimBlock() { 350 assert(Tok.is(tok::verbatim_block_begin)); 351 352 VerbatimBlockComment *VB = 353 S.actOnVerbatimBlockStart(Tok.getLocation(), 354 Tok.getVerbatimBlockName()); 355 consumeToken(); 356 357 // Don't create an empty line if verbatim opening command is followed 358 // by a newline. 359 if (Tok.is(tok::newline)) 360 consumeToken(); 361 362 SmallVector<VerbatimBlockLineComment *, 8> Lines; 363 while (Tok.is(tok::verbatim_block_line) || 364 Tok.is(tok::newline)) { 365 VerbatimBlockLineComment *Line; 366 if (Tok.is(tok::verbatim_block_line)) { 367 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), 368 Tok.getVerbatimBlockText()); 369 consumeToken(); 370 if (Tok.is(tok::newline)) { 371 consumeToken(); 372 } 373 } else { 374 // Empty line, just a tok::newline. 375 Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); 376 consumeToken(); 377 } 378 Lines.push_back(Line); 379 } 380 381 assert(Tok.is(tok::verbatim_block_end)); 382 VB = S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), 383 Tok.getVerbatimBlockName(), 384 copyArray(llvm::makeArrayRef(Lines))); 385 consumeToken(); 386 387 return VB; 388 } 389 390 VerbatimLineComment *Parser::parseVerbatimLine() { 391 assert(Tok.is(tok::verbatim_line_name)); 392 393 Token NameTok = Tok; 394 consumeToken(); 395 396 SourceLocation TextBegin; 397 StringRef Text; 398 // Next token might not be a tok::verbatim_line_text if verbatim line 399 // starting command comes just before a newline or comment end. 400 if (Tok.is(tok::verbatim_line_text)) { 401 TextBegin = Tok.getLocation(); 402 Text = Tok.getVerbatimLineText(); 403 } else { 404 TextBegin = NameTok.getEndLocation(); 405 Text = ""; 406 } 407 408 VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), 409 NameTok.getVerbatimLineName(), 410 TextBegin, 411 Text); 412 consumeToken(); 413 return VL; 414 } 415 416 BlockContentComment *Parser::parseBlockContent() { 417 switch (Tok.getKind()) { 418 case tok::text: 419 case tok::command: 420 case tok::html_start_tag: 421 case tok::html_end_tag: 422 return parseParagraphOrBlockCommand(); 423 424 case tok::verbatim_block_begin: 425 return parseVerbatimBlock(); 426 427 case tok::verbatim_line_name: 428 return parseVerbatimLine(); 429 430 case tok::eof: 431 case tok::newline: 432 case tok::verbatim_block_line: 433 case tok::verbatim_block_end: 434 case tok::verbatim_line_text: 435 case tok::html_ident: 436 case tok::html_equals: 437 case tok::html_quoted_string: 438 case tok::html_greater: 439 case tok::html_slash_greater: 440 llvm_unreachable("should not see this token"); 441 } 442 llvm_unreachable("bogus token kind"); 443 } 444 445 FullComment *Parser::parseFullComment() { 446 // Skip newlines at the beginning of the comment. 447 while (Tok.is(tok::newline)) 448 consumeToken(); 449 450 SmallVector<BlockContentComment *, 8> Blocks; 451 while (Tok.isNot(tok::eof)) { 452 Blocks.push_back(parseBlockContent()); 453 454 // Skip extra newlines after paragraph end. 455 while (Tok.is(tok::newline)) 456 consumeToken(); 457 } 458 return S.actOnFullComment(copyArray(llvm::makeArrayRef(Blocks))); 459 } 460 461 } // end namespace comments 462 } // end namespace clang 463