10b57cec5SDimitry Andric //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "clang/AST/CommentParser.h" 100b57cec5SDimitry Andric #include "clang/AST/CommentCommandTraits.h" 110b57cec5SDimitry Andric #include "clang/AST/CommentDiagnostic.h" 120b57cec5SDimitry Andric #include "clang/AST/CommentSema.h" 130b57cec5SDimitry Andric #include "clang/Basic/CharInfo.h" 140b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h" 150b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric namespace clang { 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric static inline bool isWhitespace(llvm::StringRef S) { 200b57cec5SDimitry Andric for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) { 210b57cec5SDimitry Andric if (!isWhitespace(*I)) 220b57cec5SDimitry Andric return false; 230b57cec5SDimitry Andric } 240b57cec5SDimitry Andric return true; 250b57cec5SDimitry Andric } 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric namespace comments { 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric /// Re-lexes a sequence of tok::text tokens. 300b57cec5SDimitry Andric class TextTokenRetokenizer { 310b57cec5SDimitry Andric llvm::BumpPtrAllocator &Allocator; 320b57cec5SDimitry Andric Parser &P; 330b57cec5SDimitry Andric 340b57cec5SDimitry Andric /// This flag is set when there are no more tokens we can fetch from lexer. 350b57cec5SDimitry Andric bool NoMoreInterestingTokens; 360b57cec5SDimitry Andric 370b57cec5SDimitry Andric /// Token buffer: tokens we have processed and lookahead. 380b57cec5SDimitry Andric SmallVector<Token, 16> Toks; 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric /// A position in \c Toks. 410b57cec5SDimitry Andric struct Position { 420b57cec5SDimitry Andric const char *BufferStart; 430b57cec5SDimitry Andric const char *BufferEnd; 440b57cec5SDimitry Andric const char *BufferPtr; 450b57cec5SDimitry Andric SourceLocation BufferStartLoc; 460b57cec5SDimitry Andric unsigned CurToken; 470b57cec5SDimitry Andric }; 480b57cec5SDimitry Andric 490b57cec5SDimitry Andric /// Current position in Toks. 500b57cec5SDimitry Andric Position Pos; 510b57cec5SDimitry Andric 520b57cec5SDimitry Andric bool isEnd() const { 530b57cec5SDimitry Andric return Pos.CurToken >= Toks.size(); 540b57cec5SDimitry Andric } 550b57cec5SDimitry Andric 560b57cec5SDimitry Andric /// Sets up the buffer pointers to point to current token. 570b57cec5SDimitry Andric void setupBuffer() { 580b57cec5SDimitry Andric assert(!isEnd()); 590b57cec5SDimitry Andric const Token &Tok = Toks[Pos.CurToken]; 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric Pos.BufferStart = Tok.getText().begin(); 620b57cec5SDimitry Andric Pos.BufferEnd = Tok.getText().end(); 630b57cec5SDimitry Andric Pos.BufferPtr = Pos.BufferStart; 640b57cec5SDimitry Andric Pos.BufferStartLoc = Tok.getLocation(); 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric SourceLocation getSourceLocation() const { 680b57cec5SDimitry Andric const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; 690b57cec5SDimitry Andric return Pos.BufferStartLoc.getLocWithOffset(CharNo); 700b57cec5SDimitry Andric } 710b57cec5SDimitry Andric 720b57cec5SDimitry Andric char peek() const { 730b57cec5SDimitry Andric assert(!isEnd()); 740b57cec5SDimitry Andric assert(Pos.BufferPtr != Pos.BufferEnd); 750b57cec5SDimitry Andric return *Pos.BufferPtr; 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric void consumeChar() { 790b57cec5SDimitry Andric assert(!isEnd()); 800b57cec5SDimitry Andric assert(Pos.BufferPtr != Pos.BufferEnd); 810b57cec5SDimitry Andric Pos.BufferPtr++; 820b57cec5SDimitry Andric if (Pos.BufferPtr == Pos.BufferEnd) { 830b57cec5SDimitry Andric Pos.CurToken++; 840b57cec5SDimitry Andric if (isEnd() && !addToken()) 850b57cec5SDimitry Andric return; 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric assert(!isEnd()); 880b57cec5SDimitry Andric setupBuffer(); 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric } 910b57cec5SDimitry Andric 92*0fca6ea1SDimitry Andric /// Extract a template type 93*0fca6ea1SDimitry Andric bool lexTemplate(SmallString<32> &WordText) { 94*0fca6ea1SDimitry Andric unsigned BracketCount = 0; 95*0fca6ea1SDimitry Andric while (!isEnd()) { 96*0fca6ea1SDimitry Andric const char C = peek(); 97*0fca6ea1SDimitry Andric WordText.push_back(C); 98*0fca6ea1SDimitry Andric consumeChar(); 99*0fca6ea1SDimitry Andric switch (C) { 100*0fca6ea1SDimitry Andric case '<': { 101*0fca6ea1SDimitry Andric BracketCount++; 102*0fca6ea1SDimitry Andric break; 103*0fca6ea1SDimitry Andric } 104*0fca6ea1SDimitry Andric case '>': { 105*0fca6ea1SDimitry Andric BracketCount--; 106*0fca6ea1SDimitry Andric if (!BracketCount) 107*0fca6ea1SDimitry Andric return true; 108*0fca6ea1SDimitry Andric break; 109*0fca6ea1SDimitry Andric } 110*0fca6ea1SDimitry Andric default: 111*0fca6ea1SDimitry Andric break; 112*0fca6ea1SDimitry Andric } 113*0fca6ea1SDimitry Andric } 114*0fca6ea1SDimitry Andric return false; 115*0fca6ea1SDimitry Andric } 116*0fca6ea1SDimitry Andric 1170b57cec5SDimitry Andric /// Add a token. 1180b57cec5SDimitry Andric /// Returns true on success, false if there are no interesting tokens to 1190b57cec5SDimitry Andric /// fetch from lexer. 1200b57cec5SDimitry Andric bool addToken() { 1210b57cec5SDimitry Andric if (NoMoreInterestingTokens) 1220b57cec5SDimitry Andric return false; 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric if (P.Tok.is(tok::newline)) { 1250b57cec5SDimitry Andric // If we see a single newline token between text tokens, skip it. 1260b57cec5SDimitry Andric Token Newline = P.Tok; 1270b57cec5SDimitry Andric P.consumeToken(); 1280b57cec5SDimitry Andric if (P.Tok.isNot(tok::text)) { 1290b57cec5SDimitry Andric P.putBack(Newline); 1300b57cec5SDimitry Andric NoMoreInterestingTokens = true; 1310b57cec5SDimitry Andric return false; 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric if (P.Tok.isNot(tok::text)) { 1350b57cec5SDimitry Andric NoMoreInterestingTokens = true; 1360b57cec5SDimitry Andric return false; 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric Toks.push_back(P.Tok); 1400b57cec5SDimitry Andric P.consumeToken(); 1410b57cec5SDimitry Andric if (Toks.size() == 1) 1420b57cec5SDimitry Andric setupBuffer(); 1430b57cec5SDimitry Andric return true; 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric void consumeWhitespace() { 1470b57cec5SDimitry Andric while (!isEnd()) { 1480b57cec5SDimitry Andric if (isWhitespace(peek())) 1490b57cec5SDimitry Andric consumeChar(); 1500b57cec5SDimitry Andric else 1510b57cec5SDimitry Andric break; 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric } 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric void formTokenWithChars(Token &Result, 1560b57cec5SDimitry Andric SourceLocation Loc, 1570b57cec5SDimitry Andric const char *TokBegin, 1580b57cec5SDimitry Andric unsigned TokLength, 1590b57cec5SDimitry Andric StringRef Text) { 1600b57cec5SDimitry Andric Result.setLocation(Loc); 1610b57cec5SDimitry Andric Result.setKind(tok::text); 1620b57cec5SDimitry Andric Result.setLength(TokLength); 1630b57cec5SDimitry Andric #ifndef NDEBUG 1640b57cec5SDimitry Andric Result.TextPtr = "<UNSET>"; 1650b57cec5SDimitry Andric Result.IntVal = 7; 1660b57cec5SDimitry Andric #endif 1670b57cec5SDimitry Andric Result.setText(Text); 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric public: 1710b57cec5SDimitry Andric TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P): 1720b57cec5SDimitry Andric Allocator(Allocator), P(P), NoMoreInterestingTokens(false) { 1730b57cec5SDimitry Andric Pos.CurToken = 0; 1740b57cec5SDimitry Andric addToken(); 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 177*0fca6ea1SDimitry Andric /// Extract a type argument 178*0fca6ea1SDimitry Andric bool lexType(Token &Tok) { 179*0fca6ea1SDimitry Andric if (isEnd()) 180*0fca6ea1SDimitry Andric return false; 181*0fca6ea1SDimitry Andric 182*0fca6ea1SDimitry Andric // Save current position in case we need to rollback because the type is 183*0fca6ea1SDimitry Andric // empty. 184*0fca6ea1SDimitry Andric Position SavedPos = Pos; 185*0fca6ea1SDimitry Andric 186*0fca6ea1SDimitry Andric // Consume any leading whitespace. 187*0fca6ea1SDimitry Andric consumeWhitespace(); 188*0fca6ea1SDimitry Andric SmallString<32> WordText; 189*0fca6ea1SDimitry Andric const char *WordBegin = Pos.BufferPtr; 190*0fca6ea1SDimitry Andric SourceLocation Loc = getSourceLocation(); 191*0fca6ea1SDimitry Andric 192*0fca6ea1SDimitry Andric while (!isEnd()) { 193*0fca6ea1SDimitry Andric const char C = peek(); 194*0fca6ea1SDimitry Andric // For non-whitespace characters we check if it's a template or otherwise 195*0fca6ea1SDimitry Andric // continue reading the text into a word. 196*0fca6ea1SDimitry Andric if (!isWhitespace(C)) { 197*0fca6ea1SDimitry Andric if (C == '<') { 198*0fca6ea1SDimitry Andric if (!lexTemplate(WordText)) 199*0fca6ea1SDimitry Andric return false; 200*0fca6ea1SDimitry Andric } else { 201*0fca6ea1SDimitry Andric WordText.push_back(C); 202*0fca6ea1SDimitry Andric consumeChar(); 203*0fca6ea1SDimitry Andric } 204*0fca6ea1SDimitry Andric } else { 205*0fca6ea1SDimitry Andric consumeChar(); 206*0fca6ea1SDimitry Andric break; 207*0fca6ea1SDimitry Andric } 208*0fca6ea1SDimitry Andric } 209*0fca6ea1SDimitry Andric 210*0fca6ea1SDimitry Andric const unsigned Length = WordText.size(); 211*0fca6ea1SDimitry Andric if (Length == 0) { 212*0fca6ea1SDimitry Andric Pos = SavedPos; 213*0fca6ea1SDimitry Andric return false; 214*0fca6ea1SDimitry Andric } 215*0fca6ea1SDimitry Andric 216*0fca6ea1SDimitry Andric char *TextPtr = Allocator.Allocate<char>(Length + 1); 217*0fca6ea1SDimitry Andric 218*0fca6ea1SDimitry Andric memcpy(TextPtr, WordText.c_str(), Length + 1); 219*0fca6ea1SDimitry Andric StringRef Text = StringRef(TextPtr, Length); 220*0fca6ea1SDimitry Andric 221*0fca6ea1SDimitry Andric formTokenWithChars(Tok, Loc, WordBegin, Length, Text); 222*0fca6ea1SDimitry Andric return true; 223*0fca6ea1SDimitry Andric } 224*0fca6ea1SDimitry Andric 225*0fca6ea1SDimitry Andric // Check if this line starts with @par or \par 226*0fca6ea1SDimitry Andric bool startsWithParCommand() { 227*0fca6ea1SDimitry Andric unsigned Offset = 1; 228*0fca6ea1SDimitry Andric 229*0fca6ea1SDimitry Andric // Skip all whitespace characters at the beginning. 230*0fca6ea1SDimitry Andric // This needs to backtrack because Pos has already advanced past the 231*0fca6ea1SDimitry Andric // actual \par or @par command by the time this function is called. 232*0fca6ea1SDimitry Andric while (isWhitespace(*(Pos.BufferPtr - Offset))) 233*0fca6ea1SDimitry Andric Offset++; 234*0fca6ea1SDimitry Andric 235*0fca6ea1SDimitry Andric // Once we've reached the whitespace, backtrack and check if the previous 236*0fca6ea1SDimitry Andric // four characters are \par or @par. 237*0fca6ea1SDimitry Andric llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4); 238*0fca6ea1SDimitry Andric return LineStart.starts_with("\\par") || LineStart.starts_with("@par"); 239*0fca6ea1SDimitry Andric } 240*0fca6ea1SDimitry Andric 241*0fca6ea1SDimitry Andric /// Extract a par command argument-header. 242*0fca6ea1SDimitry Andric bool lexParHeading(Token &Tok) { 243*0fca6ea1SDimitry Andric if (isEnd()) 244*0fca6ea1SDimitry Andric return false; 245*0fca6ea1SDimitry Andric 246*0fca6ea1SDimitry Andric Position SavedPos = Pos; 247*0fca6ea1SDimitry Andric 248*0fca6ea1SDimitry Andric consumeWhitespace(); 249*0fca6ea1SDimitry Andric SmallString<32> WordText; 250*0fca6ea1SDimitry Andric const char *WordBegin = Pos.BufferPtr; 251*0fca6ea1SDimitry Andric SourceLocation Loc = getSourceLocation(); 252*0fca6ea1SDimitry Andric 253*0fca6ea1SDimitry Andric if (!startsWithParCommand()) 254*0fca6ea1SDimitry Andric return false; 255*0fca6ea1SDimitry Andric 256*0fca6ea1SDimitry Andric // Read until the end of this token, which is effectively the end of the 257*0fca6ea1SDimitry Andric // line. This gets us the content of the par header, if there is one. 258*0fca6ea1SDimitry Andric while (!isEnd()) { 259*0fca6ea1SDimitry Andric WordText.push_back(peek()); 260*0fca6ea1SDimitry Andric if (Pos.BufferPtr + 1 == Pos.BufferEnd) { 261*0fca6ea1SDimitry Andric consumeChar(); 262*0fca6ea1SDimitry Andric break; 263*0fca6ea1SDimitry Andric } 264*0fca6ea1SDimitry Andric consumeChar(); 265*0fca6ea1SDimitry Andric } 266*0fca6ea1SDimitry Andric 267*0fca6ea1SDimitry Andric unsigned Length = WordText.size(); 268*0fca6ea1SDimitry Andric if (Length == 0) { 269*0fca6ea1SDimitry Andric Pos = SavedPos; 270*0fca6ea1SDimitry Andric return false; 271*0fca6ea1SDimitry Andric } 272*0fca6ea1SDimitry Andric 273*0fca6ea1SDimitry Andric char *TextPtr = Allocator.Allocate<char>(Length + 1); 274*0fca6ea1SDimitry Andric 275*0fca6ea1SDimitry Andric memcpy(TextPtr, WordText.c_str(), Length + 1); 276*0fca6ea1SDimitry Andric StringRef Text = StringRef(TextPtr, Length); 277*0fca6ea1SDimitry Andric 278*0fca6ea1SDimitry Andric formTokenWithChars(Tok, Loc, WordBegin, Length, Text); 279*0fca6ea1SDimitry Andric return true; 280*0fca6ea1SDimitry Andric } 281*0fca6ea1SDimitry Andric 2820b57cec5SDimitry Andric /// Extract a word -- sequence of non-whitespace characters. 2830b57cec5SDimitry Andric bool lexWord(Token &Tok) { 2840b57cec5SDimitry Andric if (isEnd()) 2850b57cec5SDimitry Andric return false; 2860b57cec5SDimitry Andric 2870b57cec5SDimitry Andric Position SavedPos = Pos; 2880b57cec5SDimitry Andric 2890b57cec5SDimitry Andric consumeWhitespace(); 2900b57cec5SDimitry Andric SmallString<32> WordText; 2910b57cec5SDimitry Andric const char *WordBegin = Pos.BufferPtr; 2920b57cec5SDimitry Andric SourceLocation Loc = getSourceLocation(); 2930b57cec5SDimitry Andric while (!isEnd()) { 2940b57cec5SDimitry Andric const char C = peek(); 2950b57cec5SDimitry Andric if (!isWhitespace(C)) { 2960b57cec5SDimitry Andric WordText.push_back(C); 2970b57cec5SDimitry Andric consumeChar(); 2980b57cec5SDimitry Andric } else 2990b57cec5SDimitry Andric break; 3000b57cec5SDimitry Andric } 3010b57cec5SDimitry Andric const unsigned Length = WordText.size(); 3020b57cec5SDimitry Andric if (Length == 0) { 3030b57cec5SDimitry Andric Pos = SavedPos; 3040b57cec5SDimitry Andric return false; 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric char *TextPtr = Allocator.Allocate<char>(Length + 1); 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric memcpy(TextPtr, WordText.c_str(), Length + 1); 3100b57cec5SDimitry Andric StringRef Text = StringRef(TextPtr, Length); 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric formTokenWithChars(Tok, Loc, WordBegin, Length, Text); 3130b57cec5SDimitry Andric return true; 3140b57cec5SDimitry Andric } 3150b57cec5SDimitry Andric 3160b57cec5SDimitry Andric bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { 3170b57cec5SDimitry Andric if (isEnd()) 3180b57cec5SDimitry Andric return false; 3190b57cec5SDimitry Andric 3200b57cec5SDimitry Andric Position SavedPos = Pos; 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric consumeWhitespace(); 3230b57cec5SDimitry Andric SmallString<32> WordText; 3240b57cec5SDimitry Andric const char *WordBegin = Pos.BufferPtr; 3250b57cec5SDimitry Andric SourceLocation Loc = getSourceLocation(); 3260b57cec5SDimitry Andric bool Error = false; 3270b57cec5SDimitry Andric if (!isEnd()) { 3280b57cec5SDimitry Andric const char C = peek(); 3290b57cec5SDimitry Andric if (C == OpenDelim) { 3300b57cec5SDimitry Andric WordText.push_back(C); 3310b57cec5SDimitry Andric consumeChar(); 3320b57cec5SDimitry Andric } else 3330b57cec5SDimitry Andric Error = true; 3340b57cec5SDimitry Andric } 3350b57cec5SDimitry Andric char C = '\0'; 3360b57cec5SDimitry Andric while (!Error && !isEnd()) { 3370b57cec5SDimitry Andric C = peek(); 3380b57cec5SDimitry Andric WordText.push_back(C); 3390b57cec5SDimitry Andric consumeChar(); 3400b57cec5SDimitry Andric if (C == CloseDelim) 3410b57cec5SDimitry Andric break; 3420b57cec5SDimitry Andric } 3430b57cec5SDimitry Andric if (!Error && C != CloseDelim) 3440b57cec5SDimitry Andric Error = true; 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric if (Error) { 3470b57cec5SDimitry Andric Pos = SavedPos; 3480b57cec5SDimitry Andric return false; 3490b57cec5SDimitry Andric } 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric const unsigned Length = WordText.size(); 3520b57cec5SDimitry Andric char *TextPtr = Allocator.Allocate<char>(Length + 1); 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric memcpy(TextPtr, WordText.c_str(), Length + 1); 3550b57cec5SDimitry Andric StringRef Text = StringRef(TextPtr, Length); 3560b57cec5SDimitry Andric 3570b57cec5SDimitry Andric formTokenWithChars(Tok, Loc, WordBegin, 3580b57cec5SDimitry Andric Pos.BufferPtr - WordBegin, Text); 3590b57cec5SDimitry Andric return true; 3600b57cec5SDimitry Andric } 3610b57cec5SDimitry Andric 3620b57cec5SDimitry Andric /// Put back tokens that we didn't consume. 3630b57cec5SDimitry Andric void putBackLeftoverTokens() { 3640b57cec5SDimitry Andric if (isEnd()) 3650b57cec5SDimitry Andric return; 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andric bool HavePartialTok = false; 3680b57cec5SDimitry Andric Token PartialTok; 3690b57cec5SDimitry Andric if (Pos.BufferPtr != Pos.BufferStart) { 3700b57cec5SDimitry Andric formTokenWithChars(PartialTok, getSourceLocation(), 3710b57cec5SDimitry Andric Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, 3720b57cec5SDimitry Andric StringRef(Pos.BufferPtr, 3730b57cec5SDimitry Andric Pos.BufferEnd - Pos.BufferPtr)); 3740b57cec5SDimitry Andric HavePartialTok = true; 3750b57cec5SDimitry Andric Pos.CurToken++; 3760b57cec5SDimitry Andric } 3770b57cec5SDimitry Andric 378bdd1243dSDimitry Andric P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); 3790b57cec5SDimitry Andric Pos.CurToken = Toks.size(); 3800b57cec5SDimitry Andric 3810b57cec5SDimitry Andric if (HavePartialTok) 3820b57cec5SDimitry Andric P.putBack(PartialTok); 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric }; 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, 3870b57cec5SDimitry Andric const SourceManager &SourceMgr, DiagnosticsEngine &Diags, 3880b57cec5SDimitry Andric const CommandTraits &Traits): 3890b57cec5SDimitry Andric L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), 3900b57cec5SDimitry Andric Traits(Traits) { 3910b57cec5SDimitry Andric consumeToken(); 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric void Parser::parseParamCommandArgs(ParamCommandComment *PC, 3950b57cec5SDimitry Andric TextTokenRetokenizer &Retokenizer) { 3960b57cec5SDimitry Andric Token Arg; 3970b57cec5SDimitry Andric // Check if argument looks like direction specification: [dir] 3980b57cec5SDimitry Andric // e.g., [in], [out], [in,out] 3990b57cec5SDimitry Andric if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) 4000b57cec5SDimitry Andric S.actOnParamCommandDirectionArg(PC, 4010b57cec5SDimitry Andric Arg.getLocation(), 4020b57cec5SDimitry Andric Arg.getEndLocation(), 4030b57cec5SDimitry Andric Arg.getText()); 4040b57cec5SDimitry Andric 4050b57cec5SDimitry Andric if (Retokenizer.lexWord(Arg)) 4060b57cec5SDimitry Andric S.actOnParamCommandParamNameArg(PC, 4070b57cec5SDimitry Andric Arg.getLocation(), 4080b57cec5SDimitry Andric Arg.getEndLocation(), 4090b57cec5SDimitry Andric Arg.getText()); 4100b57cec5SDimitry Andric } 4110b57cec5SDimitry Andric 4120b57cec5SDimitry Andric void Parser::parseTParamCommandArgs(TParamCommandComment *TPC, 4130b57cec5SDimitry Andric TextTokenRetokenizer &Retokenizer) { 4140b57cec5SDimitry Andric Token Arg; 4150b57cec5SDimitry Andric if (Retokenizer.lexWord(Arg)) 4160b57cec5SDimitry Andric S.actOnTParamCommandParamNameArg(TPC, 4170b57cec5SDimitry Andric Arg.getLocation(), 4180b57cec5SDimitry Andric Arg.getEndLocation(), 4190b57cec5SDimitry Andric Arg.getText()); 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 42281ad6265SDimitry Andric ArrayRef<Comment::Argument> 42381ad6265SDimitry Andric Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) { 42481ad6265SDimitry Andric auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) 42581ad6265SDimitry Andric Comment::Argument[NumArgs]; 4260b57cec5SDimitry Andric unsigned ParsedArgs = 0; 4270b57cec5SDimitry Andric Token Arg; 4280b57cec5SDimitry Andric while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { 42981ad6265SDimitry Andric Args[ParsedArgs] = Comment::Argument{ 43081ad6265SDimitry Andric SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; 4310b57cec5SDimitry Andric ParsedArgs++; 4320b57cec5SDimitry Andric } 4330b57cec5SDimitry Andric 434bdd1243dSDimitry Andric return llvm::ArrayRef(Args, ParsedArgs); 4350b57cec5SDimitry Andric } 4360b57cec5SDimitry Andric 437*0fca6ea1SDimitry Andric ArrayRef<Comment::Argument> 438*0fca6ea1SDimitry Andric Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, 439*0fca6ea1SDimitry Andric unsigned NumArgs) { 440*0fca6ea1SDimitry Andric auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) 441*0fca6ea1SDimitry Andric Comment::Argument[NumArgs]; 442*0fca6ea1SDimitry Andric unsigned ParsedArgs = 0; 443*0fca6ea1SDimitry Andric Token Arg; 444*0fca6ea1SDimitry Andric 445*0fca6ea1SDimitry Andric while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) { 446*0fca6ea1SDimitry Andric Args[ParsedArgs] = Comment::Argument{ 447*0fca6ea1SDimitry Andric SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; 448*0fca6ea1SDimitry Andric ParsedArgs++; 449*0fca6ea1SDimitry Andric } 450*0fca6ea1SDimitry Andric 451*0fca6ea1SDimitry Andric return llvm::ArrayRef(Args, ParsedArgs); 452*0fca6ea1SDimitry Andric } 453*0fca6ea1SDimitry Andric 454*0fca6ea1SDimitry Andric ArrayRef<Comment::Argument> 455*0fca6ea1SDimitry Andric Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer, 456*0fca6ea1SDimitry Andric unsigned NumArgs) { 457*0fca6ea1SDimitry Andric assert(NumArgs > 0); 458*0fca6ea1SDimitry Andric auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) 459*0fca6ea1SDimitry Andric Comment::Argument[NumArgs]; 460*0fca6ea1SDimitry Andric unsigned ParsedArgs = 0; 461*0fca6ea1SDimitry Andric Token Arg; 462*0fca6ea1SDimitry Andric 463*0fca6ea1SDimitry Andric while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) { 464*0fca6ea1SDimitry Andric Args[ParsedArgs] = Comment::Argument{ 465*0fca6ea1SDimitry Andric SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; 466*0fca6ea1SDimitry Andric ParsedArgs++; 467*0fca6ea1SDimitry Andric } 468*0fca6ea1SDimitry Andric 469*0fca6ea1SDimitry Andric return llvm::ArrayRef(Args, ParsedArgs); 470*0fca6ea1SDimitry Andric } 471*0fca6ea1SDimitry Andric 4720b57cec5SDimitry Andric BlockCommandComment *Parser::parseBlockCommand() { 4730b57cec5SDimitry Andric assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric ParamCommandComment *PC = nullptr; 4760b57cec5SDimitry Andric TParamCommandComment *TPC = nullptr; 4770b57cec5SDimitry Andric BlockCommandComment *BC = nullptr; 4780b57cec5SDimitry Andric const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); 4790b57cec5SDimitry Andric CommandMarkerKind CommandMarker = 4800b57cec5SDimitry Andric Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At; 4810b57cec5SDimitry Andric if (Info->IsParamCommand) { 4820b57cec5SDimitry Andric PC = S.actOnParamCommandStart(Tok.getLocation(), 4830b57cec5SDimitry Andric Tok.getEndLocation(), 4840b57cec5SDimitry Andric Tok.getCommandID(), 4850b57cec5SDimitry Andric CommandMarker); 4860b57cec5SDimitry Andric } else if (Info->IsTParamCommand) { 4870b57cec5SDimitry Andric TPC = S.actOnTParamCommandStart(Tok.getLocation(), 4880b57cec5SDimitry Andric Tok.getEndLocation(), 4890b57cec5SDimitry Andric Tok.getCommandID(), 4900b57cec5SDimitry Andric CommandMarker); 4910b57cec5SDimitry Andric } else { 4920b57cec5SDimitry Andric BC = S.actOnBlockCommandStart(Tok.getLocation(), 4930b57cec5SDimitry Andric Tok.getEndLocation(), 4940b57cec5SDimitry Andric Tok.getCommandID(), 4950b57cec5SDimitry Andric CommandMarker); 4960b57cec5SDimitry Andric } 4970b57cec5SDimitry Andric consumeToken(); 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric if (isTokBlockCommand()) { 5000b57cec5SDimitry Andric // Block command ahead. We can't nest block commands, so pretend that this 5010b57cec5SDimitry Andric // command has an empty argument. 502bdd1243dSDimitry Andric ParagraphComment *Paragraph = S.actOnParagraphComment(std::nullopt); 5030b57cec5SDimitry Andric if (PC) { 5040b57cec5SDimitry Andric S.actOnParamCommandFinish(PC, Paragraph); 5050b57cec5SDimitry Andric return PC; 5060b57cec5SDimitry Andric } else if (TPC) { 5070b57cec5SDimitry Andric S.actOnTParamCommandFinish(TPC, Paragraph); 5080b57cec5SDimitry Andric return TPC; 5090b57cec5SDimitry Andric } else { 5100b57cec5SDimitry Andric S.actOnBlockCommandFinish(BC, Paragraph); 5110b57cec5SDimitry Andric return BC; 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric } 5140b57cec5SDimitry Andric 5150b57cec5SDimitry Andric if (PC || TPC || Info->NumArgs > 0) { 5160b57cec5SDimitry Andric // In order to parse command arguments we need to retokenize a few 5170b57cec5SDimitry Andric // following text tokens. 5180b57cec5SDimitry Andric TextTokenRetokenizer Retokenizer(Allocator, *this); 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric if (PC) 5210b57cec5SDimitry Andric parseParamCommandArgs(PC, Retokenizer); 5220b57cec5SDimitry Andric else if (TPC) 5230b57cec5SDimitry Andric parseTParamCommandArgs(TPC, Retokenizer); 524*0fca6ea1SDimitry Andric else if (Info->IsThrowsCommand) 525*0fca6ea1SDimitry Andric S.actOnBlockCommandArgs( 526*0fca6ea1SDimitry Andric BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs)); 527*0fca6ea1SDimitry Andric else if (Info->IsParCommand) 528*0fca6ea1SDimitry Andric S.actOnBlockCommandArgs(BC, 529*0fca6ea1SDimitry Andric parseParCommandArgs(Retokenizer, Info->NumArgs)); 5300b57cec5SDimitry Andric else 53181ad6265SDimitry Andric S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs)); 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric Retokenizer.putBackLeftoverTokens(); 5340b57cec5SDimitry Andric } 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric // If there's a block command ahead, we will attach an empty paragraph to 5370b57cec5SDimitry Andric // this command. 5380b57cec5SDimitry Andric bool EmptyParagraph = false; 5390b57cec5SDimitry Andric if (isTokBlockCommand()) 5400b57cec5SDimitry Andric EmptyParagraph = true; 5410b57cec5SDimitry Andric else if (Tok.is(tok::newline)) { 5420b57cec5SDimitry Andric Token PrevTok = Tok; 5430b57cec5SDimitry Andric consumeToken(); 5440b57cec5SDimitry Andric EmptyParagraph = isTokBlockCommand(); 5450b57cec5SDimitry Andric putBack(PrevTok); 5460b57cec5SDimitry Andric } 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric ParagraphComment *Paragraph; 5490b57cec5SDimitry Andric if (EmptyParagraph) 550bdd1243dSDimitry Andric Paragraph = S.actOnParagraphComment(std::nullopt); 5510b57cec5SDimitry Andric else { 5520b57cec5SDimitry Andric BlockContentComment *Block = parseParagraphOrBlockCommand(); 5530b57cec5SDimitry Andric // Since we have checked for a block command, we should have parsed a 5540b57cec5SDimitry Andric // paragraph. 5550b57cec5SDimitry Andric Paragraph = cast<ParagraphComment>(Block); 5560b57cec5SDimitry Andric } 5570b57cec5SDimitry Andric 5580b57cec5SDimitry Andric if (PC) { 5590b57cec5SDimitry Andric S.actOnParamCommandFinish(PC, Paragraph); 5600b57cec5SDimitry Andric return PC; 5610b57cec5SDimitry Andric } else if (TPC) { 5620b57cec5SDimitry Andric S.actOnTParamCommandFinish(TPC, Paragraph); 5630b57cec5SDimitry Andric return TPC; 5640b57cec5SDimitry Andric } else { 5650b57cec5SDimitry Andric S.actOnBlockCommandFinish(BC, Paragraph); 5660b57cec5SDimitry Andric return BC; 5670b57cec5SDimitry Andric } 5680b57cec5SDimitry Andric } 5690b57cec5SDimitry Andric 5700b57cec5SDimitry Andric InlineCommandComment *Parser::parseInlineCommand() { 5710b57cec5SDimitry Andric assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); 57281ad6265SDimitry Andric const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric const Token CommandTok = Tok; 5750b57cec5SDimitry Andric consumeToken(); 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric TextTokenRetokenizer Retokenizer(Allocator, *this); 57881ad6265SDimitry Andric ArrayRef<Comment::Argument> Args = 57981ad6265SDimitry Andric parseCommandArgs(Retokenizer, Info->NumArgs); 5800b57cec5SDimitry Andric 58181ad6265SDimitry Andric InlineCommandComment *IC = S.actOnInlineCommand( 58281ad6265SDimitry Andric CommandTok.getLocation(), CommandTok.getEndLocation(), 58381ad6265SDimitry Andric CommandTok.getCommandID(), Args); 5840b57cec5SDimitry Andric 58581ad6265SDimitry Andric if (Args.size() < Info->NumArgs) { 586a7dea167SDimitry Andric Diag(CommandTok.getEndLocation().getLocWithOffset(1), 58781ad6265SDimitry Andric diag::warn_doc_inline_command_not_enough_arguments) 58881ad6265SDimitry Andric << CommandTok.is(tok::at_command) << Info->Name << Args.size() 58981ad6265SDimitry Andric << Info->NumArgs 590a7dea167SDimitry Andric << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation()); 5910b57cec5SDimitry Andric } 5920b57cec5SDimitry Andric 5930b57cec5SDimitry Andric Retokenizer.putBackLeftoverTokens(); 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric return IC; 5960b57cec5SDimitry Andric } 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric HTMLStartTagComment *Parser::parseHTMLStartTag() { 5990b57cec5SDimitry Andric assert(Tok.is(tok::html_start_tag)); 6000b57cec5SDimitry Andric HTMLStartTagComment *HST = 6010b57cec5SDimitry Andric S.actOnHTMLStartTagStart(Tok.getLocation(), 6020b57cec5SDimitry Andric Tok.getHTMLTagStartName()); 6030b57cec5SDimitry Andric consumeToken(); 6040b57cec5SDimitry Andric 6050b57cec5SDimitry Andric SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; 6060b57cec5SDimitry Andric while (true) { 6070b57cec5SDimitry Andric switch (Tok.getKind()) { 6080b57cec5SDimitry Andric case tok::html_ident: { 6090b57cec5SDimitry Andric Token Ident = Tok; 6100b57cec5SDimitry Andric consumeToken(); 6110b57cec5SDimitry Andric if (Tok.isNot(tok::html_equals)) { 6120b57cec5SDimitry Andric Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 6130b57cec5SDimitry Andric Ident.getHTMLIdent())); 6140b57cec5SDimitry Andric continue; 6150b57cec5SDimitry Andric } 6160b57cec5SDimitry Andric Token Equals = Tok; 6170b57cec5SDimitry Andric consumeToken(); 6180b57cec5SDimitry Andric if (Tok.isNot(tok::html_quoted_string)) { 6190b57cec5SDimitry Andric Diag(Tok.getLocation(), 6200b57cec5SDimitry Andric diag::warn_doc_html_start_tag_expected_quoted_string) 6210b57cec5SDimitry Andric << SourceRange(Equals.getLocation()); 6220b57cec5SDimitry Andric Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), 6230b57cec5SDimitry Andric Ident.getHTMLIdent())); 6240b57cec5SDimitry Andric while (Tok.is(tok::html_equals) || 6250b57cec5SDimitry Andric Tok.is(tok::html_quoted_string)) 6260b57cec5SDimitry Andric consumeToken(); 6270b57cec5SDimitry Andric continue; 6280b57cec5SDimitry Andric } 6290b57cec5SDimitry Andric Attrs.push_back(HTMLStartTagComment::Attribute( 6300b57cec5SDimitry Andric Ident.getLocation(), 6310b57cec5SDimitry Andric Ident.getHTMLIdent(), 6320b57cec5SDimitry Andric Equals.getLocation(), 6330b57cec5SDimitry Andric SourceRange(Tok.getLocation(), 6340b57cec5SDimitry Andric Tok.getEndLocation()), 6350b57cec5SDimitry Andric Tok.getHTMLQuotedString())); 6360b57cec5SDimitry Andric consumeToken(); 6370b57cec5SDimitry Andric continue; 6380b57cec5SDimitry Andric } 6390b57cec5SDimitry Andric 6400b57cec5SDimitry Andric case tok::html_greater: 641bdd1243dSDimitry Andric S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 6420b57cec5SDimitry Andric Tok.getLocation(), 6430b57cec5SDimitry Andric /* IsSelfClosing = */ false); 6440b57cec5SDimitry Andric consumeToken(); 6450b57cec5SDimitry Andric return HST; 6460b57cec5SDimitry Andric 6470b57cec5SDimitry Andric case tok::html_slash_greater: 648bdd1243dSDimitry Andric S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 6490b57cec5SDimitry Andric Tok.getLocation(), 6500b57cec5SDimitry Andric /* IsSelfClosing = */ true); 6510b57cec5SDimitry Andric consumeToken(); 6520b57cec5SDimitry Andric return HST; 6530b57cec5SDimitry Andric 6540b57cec5SDimitry Andric case tok::html_equals: 6550b57cec5SDimitry Andric case tok::html_quoted_string: 6560b57cec5SDimitry Andric Diag(Tok.getLocation(), 6570b57cec5SDimitry Andric diag::warn_doc_html_start_tag_expected_ident_or_greater); 6580b57cec5SDimitry Andric while (Tok.is(tok::html_equals) || 6590b57cec5SDimitry Andric Tok.is(tok::html_quoted_string)) 6600b57cec5SDimitry Andric consumeToken(); 6610b57cec5SDimitry Andric if (Tok.is(tok::html_ident) || 6620b57cec5SDimitry Andric Tok.is(tok::html_greater) || 6630b57cec5SDimitry Andric Tok.is(tok::html_slash_greater)) 6640b57cec5SDimitry Andric continue; 6650b57cec5SDimitry Andric 666bdd1243dSDimitry Andric S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 6670b57cec5SDimitry Andric SourceLocation(), 6680b57cec5SDimitry Andric /* IsSelfClosing = */ false); 6690b57cec5SDimitry Andric return HST; 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric default: 6720b57cec5SDimitry Andric // Not a token from an HTML start tag. Thus HTML tag prematurely ended. 673bdd1243dSDimitry Andric S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), 6740b57cec5SDimitry Andric SourceLocation(), 6750b57cec5SDimitry Andric /* IsSelfClosing = */ false); 6760b57cec5SDimitry Andric bool StartLineInvalid; 6770b57cec5SDimitry Andric const unsigned StartLine = SourceMgr.getPresumedLineNumber( 6780b57cec5SDimitry Andric HST->getLocation(), 6790b57cec5SDimitry Andric &StartLineInvalid); 6800b57cec5SDimitry Andric bool EndLineInvalid; 6810b57cec5SDimitry Andric const unsigned EndLine = SourceMgr.getPresumedLineNumber( 6820b57cec5SDimitry Andric Tok.getLocation(), 6830b57cec5SDimitry Andric &EndLineInvalid); 6840b57cec5SDimitry Andric if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) 6850b57cec5SDimitry Andric Diag(Tok.getLocation(), 6860b57cec5SDimitry Andric diag::warn_doc_html_start_tag_expected_ident_or_greater) 6870b57cec5SDimitry Andric << HST->getSourceRange(); 6880b57cec5SDimitry Andric else { 6890b57cec5SDimitry Andric Diag(Tok.getLocation(), 6900b57cec5SDimitry Andric diag::warn_doc_html_start_tag_expected_ident_or_greater); 6910b57cec5SDimitry Andric Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) 6920b57cec5SDimitry Andric << HST->getSourceRange(); 6930b57cec5SDimitry Andric } 6940b57cec5SDimitry Andric return HST; 6950b57cec5SDimitry Andric } 6960b57cec5SDimitry Andric } 6970b57cec5SDimitry Andric } 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric HTMLEndTagComment *Parser::parseHTMLEndTag() { 7000b57cec5SDimitry Andric assert(Tok.is(tok::html_end_tag)); 7010b57cec5SDimitry Andric Token TokEndTag = Tok; 7020b57cec5SDimitry Andric consumeToken(); 7030b57cec5SDimitry Andric SourceLocation Loc; 7040b57cec5SDimitry Andric if (Tok.is(tok::html_greater)) { 7050b57cec5SDimitry Andric Loc = Tok.getLocation(); 7060b57cec5SDimitry Andric consumeToken(); 7070b57cec5SDimitry Andric } 7080b57cec5SDimitry Andric 7090b57cec5SDimitry Andric return S.actOnHTMLEndTag(TokEndTag.getLocation(), 7100b57cec5SDimitry Andric Loc, 7110b57cec5SDimitry Andric TokEndTag.getHTMLTagEndName()); 7120b57cec5SDimitry Andric } 7130b57cec5SDimitry Andric 7140b57cec5SDimitry Andric BlockContentComment *Parser::parseParagraphOrBlockCommand() { 7150b57cec5SDimitry Andric SmallVector<InlineContentComment *, 8> Content; 7160b57cec5SDimitry Andric 7170b57cec5SDimitry Andric while (true) { 7180b57cec5SDimitry Andric switch (Tok.getKind()) { 7190b57cec5SDimitry Andric case tok::verbatim_block_begin: 7200b57cec5SDimitry Andric case tok::verbatim_line_name: 7210b57cec5SDimitry Andric case tok::eof: 7220b57cec5SDimitry Andric break; // Block content or EOF ahead, finish this parapgaph. 7230b57cec5SDimitry Andric 7240b57cec5SDimitry Andric case tok::unknown_command: 7250b57cec5SDimitry Andric Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 7260b57cec5SDimitry Andric Tok.getEndLocation(), 7270b57cec5SDimitry Andric Tok.getUnknownCommandName())); 7280b57cec5SDimitry Andric consumeToken(); 7290b57cec5SDimitry Andric continue; 7300b57cec5SDimitry Andric 7310b57cec5SDimitry Andric case tok::backslash_command: 7320b57cec5SDimitry Andric case tok::at_command: { 7330b57cec5SDimitry Andric const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); 7340b57cec5SDimitry Andric if (Info->IsBlockCommand) { 7350b57cec5SDimitry Andric if (Content.size() == 0) 7360b57cec5SDimitry Andric return parseBlockCommand(); 7370b57cec5SDimitry Andric break; // Block command ahead, finish this parapgaph. 7380b57cec5SDimitry Andric } 7390b57cec5SDimitry Andric if (Info->IsVerbatimBlockEndCommand) { 7400b57cec5SDimitry Andric Diag(Tok.getLocation(), 7410b57cec5SDimitry Andric diag::warn_verbatim_block_end_without_start) 7420b57cec5SDimitry Andric << Tok.is(tok::at_command) 7430b57cec5SDimitry Andric << Info->Name 7440b57cec5SDimitry Andric << SourceRange(Tok.getLocation(), Tok.getEndLocation()); 7450b57cec5SDimitry Andric consumeToken(); 7460b57cec5SDimitry Andric continue; 7470b57cec5SDimitry Andric } 7480b57cec5SDimitry Andric if (Info->IsUnknownCommand) { 7490b57cec5SDimitry Andric Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), 7500b57cec5SDimitry Andric Tok.getEndLocation(), 7510b57cec5SDimitry Andric Info->getID())); 7520b57cec5SDimitry Andric consumeToken(); 7530b57cec5SDimitry Andric continue; 7540b57cec5SDimitry Andric } 7550b57cec5SDimitry Andric assert(Info->IsInlineCommand); 7560b57cec5SDimitry Andric Content.push_back(parseInlineCommand()); 7570b57cec5SDimitry Andric continue; 7580b57cec5SDimitry Andric } 7590b57cec5SDimitry Andric 7600b57cec5SDimitry Andric case tok::newline: { 7610b57cec5SDimitry Andric consumeToken(); 7620b57cec5SDimitry Andric if (Tok.is(tok::newline) || Tok.is(tok::eof)) { 7630b57cec5SDimitry Andric consumeToken(); 7640b57cec5SDimitry Andric break; // Two newlines -- end of paragraph. 7650b57cec5SDimitry Andric } 7660b57cec5SDimitry Andric // Also allow [tok::newline, tok::text, tok::newline] if the middle 7670b57cec5SDimitry Andric // tok::text is just whitespace. 7680b57cec5SDimitry Andric if (Tok.is(tok::text) && isWhitespace(Tok.getText())) { 7690b57cec5SDimitry Andric Token WhitespaceTok = Tok; 7700b57cec5SDimitry Andric consumeToken(); 7710b57cec5SDimitry Andric if (Tok.is(tok::newline) || Tok.is(tok::eof)) { 7720b57cec5SDimitry Andric consumeToken(); 7730b57cec5SDimitry Andric break; 7740b57cec5SDimitry Andric } 7750b57cec5SDimitry Andric // We have [tok::newline, tok::text, non-newline]. Put back tok::text. 7760b57cec5SDimitry Andric putBack(WhitespaceTok); 7770b57cec5SDimitry Andric } 7780b57cec5SDimitry Andric if (Content.size() > 0) 7790b57cec5SDimitry Andric Content.back()->addTrailingNewline(); 7800b57cec5SDimitry Andric continue; 7810b57cec5SDimitry Andric } 7820b57cec5SDimitry Andric 7830b57cec5SDimitry Andric // Don't deal with HTML tag soup now. 7840b57cec5SDimitry Andric case tok::html_start_tag: 7850b57cec5SDimitry Andric Content.push_back(parseHTMLStartTag()); 7860b57cec5SDimitry Andric continue; 7870b57cec5SDimitry Andric 7880b57cec5SDimitry Andric case tok::html_end_tag: 7890b57cec5SDimitry Andric Content.push_back(parseHTMLEndTag()); 7900b57cec5SDimitry Andric continue; 7910b57cec5SDimitry Andric 7920b57cec5SDimitry Andric case tok::text: 7930b57cec5SDimitry Andric Content.push_back(S.actOnText(Tok.getLocation(), 7940b57cec5SDimitry Andric Tok.getEndLocation(), 7950b57cec5SDimitry Andric Tok.getText())); 7960b57cec5SDimitry Andric consumeToken(); 7970b57cec5SDimitry Andric continue; 7980b57cec5SDimitry Andric 7990b57cec5SDimitry Andric case tok::verbatim_block_line: 8000b57cec5SDimitry Andric case tok::verbatim_block_end: 8010b57cec5SDimitry Andric case tok::verbatim_line_text: 8020b57cec5SDimitry Andric case tok::html_ident: 8030b57cec5SDimitry Andric case tok::html_equals: 8040b57cec5SDimitry Andric case tok::html_quoted_string: 8050b57cec5SDimitry Andric case tok::html_greater: 8060b57cec5SDimitry Andric case tok::html_slash_greater: 8070b57cec5SDimitry Andric llvm_unreachable("should not see this token"); 8080b57cec5SDimitry Andric } 8090b57cec5SDimitry Andric break; 8100b57cec5SDimitry Andric } 8110b57cec5SDimitry Andric 812bdd1243dSDimitry Andric return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content))); 8130b57cec5SDimitry Andric } 8140b57cec5SDimitry Andric 8150b57cec5SDimitry Andric VerbatimBlockComment *Parser::parseVerbatimBlock() { 8160b57cec5SDimitry Andric assert(Tok.is(tok::verbatim_block_begin)); 8170b57cec5SDimitry Andric 8180b57cec5SDimitry Andric VerbatimBlockComment *VB = 8190b57cec5SDimitry Andric S.actOnVerbatimBlockStart(Tok.getLocation(), 8200b57cec5SDimitry Andric Tok.getVerbatimBlockID()); 8210b57cec5SDimitry Andric consumeToken(); 8220b57cec5SDimitry Andric 8230b57cec5SDimitry Andric // Don't create an empty line if verbatim opening command is followed 8240b57cec5SDimitry Andric // by a newline. 8250b57cec5SDimitry Andric if (Tok.is(tok::newline)) 8260b57cec5SDimitry Andric consumeToken(); 8270b57cec5SDimitry Andric 8280b57cec5SDimitry Andric SmallVector<VerbatimBlockLineComment *, 8> Lines; 8290b57cec5SDimitry Andric while (Tok.is(tok::verbatim_block_line) || 8300b57cec5SDimitry Andric Tok.is(tok::newline)) { 8310b57cec5SDimitry Andric VerbatimBlockLineComment *Line; 8320b57cec5SDimitry Andric if (Tok.is(tok::verbatim_block_line)) { 8330b57cec5SDimitry Andric Line = S.actOnVerbatimBlockLine(Tok.getLocation(), 8340b57cec5SDimitry Andric Tok.getVerbatimBlockText()); 8350b57cec5SDimitry Andric consumeToken(); 8360b57cec5SDimitry Andric if (Tok.is(tok::newline)) { 8370b57cec5SDimitry Andric consumeToken(); 8380b57cec5SDimitry Andric } 8390b57cec5SDimitry Andric } else { 8400b57cec5SDimitry Andric // Empty line, just a tok::newline. 8410b57cec5SDimitry Andric Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); 8420b57cec5SDimitry Andric consumeToken(); 8430b57cec5SDimitry Andric } 8440b57cec5SDimitry Andric Lines.push_back(Line); 8450b57cec5SDimitry Andric } 8460b57cec5SDimitry Andric 8470b57cec5SDimitry Andric if (Tok.is(tok::verbatim_block_end)) { 8480b57cec5SDimitry Andric const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID()); 849bdd1243dSDimitry Andric S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name, 850bdd1243dSDimitry Andric S.copyArray(llvm::ArrayRef(Lines))); 8510b57cec5SDimitry Andric consumeToken(); 8520b57cec5SDimitry Andric } else { 8530b57cec5SDimitry Andric // Unterminated \\verbatim block 8540b57cec5SDimitry Andric S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", 855bdd1243dSDimitry Andric S.copyArray(llvm::ArrayRef(Lines))); 8560b57cec5SDimitry Andric } 8570b57cec5SDimitry Andric 8580b57cec5SDimitry Andric return VB; 8590b57cec5SDimitry Andric } 8600b57cec5SDimitry Andric 8610b57cec5SDimitry Andric VerbatimLineComment *Parser::parseVerbatimLine() { 8620b57cec5SDimitry Andric assert(Tok.is(tok::verbatim_line_name)); 8630b57cec5SDimitry Andric 8640b57cec5SDimitry Andric Token NameTok = Tok; 8650b57cec5SDimitry Andric consumeToken(); 8660b57cec5SDimitry Andric 8670b57cec5SDimitry Andric SourceLocation TextBegin; 8680b57cec5SDimitry Andric StringRef Text; 8690b57cec5SDimitry Andric // Next token might not be a tok::verbatim_line_text if verbatim line 8700b57cec5SDimitry Andric // starting command comes just before a newline or comment end. 8710b57cec5SDimitry Andric if (Tok.is(tok::verbatim_line_text)) { 8720b57cec5SDimitry Andric TextBegin = Tok.getLocation(); 8730b57cec5SDimitry Andric Text = Tok.getVerbatimLineText(); 8740b57cec5SDimitry Andric } else { 8750b57cec5SDimitry Andric TextBegin = NameTok.getEndLocation(); 8760b57cec5SDimitry Andric Text = ""; 8770b57cec5SDimitry Andric } 8780b57cec5SDimitry Andric 8790b57cec5SDimitry Andric VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), 8800b57cec5SDimitry Andric NameTok.getVerbatimLineID(), 8810b57cec5SDimitry Andric TextBegin, 8820b57cec5SDimitry Andric Text); 8830b57cec5SDimitry Andric consumeToken(); 8840b57cec5SDimitry Andric return VL; 8850b57cec5SDimitry Andric } 8860b57cec5SDimitry Andric 8870b57cec5SDimitry Andric BlockContentComment *Parser::parseBlockContent() { 8880b57cec5SDimitry Andric switch (Tok.getKind()) { 8890b57cec5SDimitry Andric case tok::text: 8900b57cec5SDimitry Andric case tok::unknown_command: 8910b57cec5SDimitry Andric case tok::backslash_command: 8920b57cec5SDimitry Andric case tok::at_command: 8930b57cec5SDimitry Andric case tok::html_start_tag: 8940b57cec5SDimitry Andric case tok::html_end_tag: 8950b57cec5SDimitry Andric return parseParagraphOrBlockCommand(); 8960b57cec5SDimitry Andric 8970b57cec5SDimitry Andric case tok::verbatim_block_begin: 8980b57cec5SDimitry Andric return parseVerbatimBlock(); 8990b57cec5SDimitry Andric 9000b57cec5SDimitry Andric case tok::verbatim_line_name: 9010b57cec5SDimitry Andric return parseVerbatimLine(); 9020b57cec5SDimitry Andric 9030b57cec5SDimitry Andric case tok::eof: 9040b57cec5SDimitry Andric case tok::newline: 9050b57cec5SDimitry Andric case tok::verbatim_block_line: 9060b57cec5SDimitry Andric case tok::verbatim_block_end: 9070b57cec5SDimitry Andric case tok::verbatim_line_text: 9080b57cec5SDimitry Andric case tok::html_ident: 9090b57cec5SDimitry Andric case tok::html_equals: 9100b57cec5SDimitry Andric case tok::html_quoted_string: 9110b57cec5SDimitry Andric case tok::html_greater: 9120b57cec5SDimitry Andric case tok::html_slash_greater: 9130b57cec5SDimitry Andric llvm_unreachable("should not see this token"); 9140b57cec5SDimitry Andric } 9150b57cec5SDimitry Andric llvm_unreachable("bogus token kind"); 9160b57cec5SDimitry Andric } 9170b57cec5SDimitry Andric 9180b57cec5SDimitry Andric FullComment *Parser::parseFullComment() { 9190b57cec5SDimitry Andric // Skip newlines at the beginning of the comment. 9200b57cec5SDimitry Andric while (Tok.is(tok::newline)) 9210b57cec5SDimitry Andric consumeToken(); 9220b57cec5SDimitry Andric 9230b57cec5SDimitry Andric SmallVector<BlockContentComment *, 8> Blocks; 9240b57cec5SDimitry Andric while (Tok.isNot(tok::eof)) { 9250b57cec5SDimitry Andric Blocks.push_back(parseBlockContent()); 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andric // Skip extra newlines after paragraph end. 9280b57cec5SDimitry Andric while (Tok.is(tok::newline)) 9290b57cec5SDimitry Andric consumeToken(); 9300b57cec5SDimitry Andric } 931bdd1243dSDimitry Andric return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks))); 9320b57cec5SDimitry Andric } 9330b57cec5SDimitry Andric 9340b57cec5SDimitry Andric } // end namespace comments 9350b57cec5SDimitry Andric } // end namespace clang 936