10b57cec5SDimitry Andric //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric
90b57cec5SDimitry Andric #include "clang/AST/RawCommentList.h"
100b57cec5SDimitry Andric #include "clang/AST/ASTContext.h"
110b57cec5SDimitry Andric #include "clang/AST/Comment.h"
120b57cec5SDimitry Andric #include "clang/AST/CommentBriefParser.h"
130b57cec5SDimitry Andric #include "clang/AST/CommentCommandTraits.h"
140b57cec5SDimitry Andric #include "clang/AST/CommentLexer.h"
150b57cec5SDimitry Andric #include "clang/AST/CommentParser.h"
160b57cec5SDimitry Andric #include "clang/AST/CommentSema.h"
170b57cec5SDimitry Andric #include "clang/Basic/CharInfo.h"
180b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
1981ad6265SDimitry Andric #include "llvm/ADT/StringExtras.h"
205ffd83dbSDimitry Andric #include "llvm/Support/Allocator.h"
210b57cec5SDimitry Andric
220b57cec5SDimitry Andric using namespace clang;
230b57cec5SDimitry Andric
240b57cec5SDimitry Andric namespace {
250b57cec5SDimitry Andric /// Get comment kind and bool describing if it is a trailing comment.
getCommentKind(StringRef Comment,bool ParseAllComments)260b57cec5SDimitry Andric std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
270b57cec5SDimitry Andric bool ParseAllComments) {
280b57cec5SDimitry Andric const size_t MinCommentLength = ParseAllComments ? 2 : 3;
290b57cec5SDimitry Andric if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
300b57cec5SDimitry Andric return std::make_pair(RawComment::RCK_Invalid, false);
310b57cec5SDimitry Andric
320b57cec5SDimitry Andric RawComment::CommentKind K;
330b57cec5SDimitry Andric if (Comment[1] == '/') {
340b57cec5SDimitry Andric if (Comment.size() < 3)
350b57cec5SDimitry Andric return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
360b57cec5SDimitry Andric
370b57cec5SDimitry Andric if (Comment[2] == '/')
380b57cec5SDimitry Andric K = RawComment::RCK_BCPLSlash;
390b57cec5SDimitry Andric else if (Comment[2] == '!')
400b57cec5SDimitry Andric K = RawComment::RCK_BCPLExcl;
410b57cec5SDimitry Andric else
420b57cec5SDimitry Andric return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
430b57cec5SDimitry Andric } else {
440b57cec5SDimitry Andric assert(Comment.size() >= 4);
450b57cec5SDimitry Andric
460b57cec5SDimitry Andric // Comment lexer does not understand escapes in comment markers, so pretend
470b57cec5SDimitry Andric // that this is not a comment.
480b57cec5SDimitry Andric if (Comment[1] != '*' ||
490b57cec5SDimitry Andric Comment[Comment.size() - 2] != '*' ||
500b57cec5SDimitry Andric Comment[Comment.size() - 1] != '/')
510b57cec5SDimitry Andric return std::make_pair(RawComment::RCK_Invalid, false);
520b57cec5SDimitry Andric
530b57cec5SDimitry Andric if (Comment[2] == '*')
540b57cec5SDimitry Andric K = RawComment::RCK_JavaDoc;
550b57cec5SDimitry Andric else if (Comment[2] == '!')
560b57cec5SDimitry Andric K = RawComment::RCK_Qt;
570b57cec5SDimitry Andric else
580b57cec5SDimitry Andric return std::make_pair(RawComment::RCK_OrdinaryC, false);
590b57cec5SDimitry Andric }
600b57cec5SDimitry Andric const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
610b57cec5SDimitry Andric return std::make_pair(K, TrailingComment);
620b57cec5SDimitry Andric }
630b57cec5SDimitry Andric
mergedCommentIsTrailingComment(StringRef Comment)640b57cec5SDimitry Andric bool mergedCommentIsTrailingComment(StringRef Comment) {
650b57cec5SDimitry Andric return (Comment.size() > 3) && (Comment[3] == '<');
660b57cec5SDimitry Andric }
670b57cec5SDimitry Andric
680b57cec5SDimitry Andric /// Returns true if R1 and R2 both have valid locations that start on the same
690b57cec5SDimitry Andric /// column.
commentsStartOnSameColumn(const SourceManager & SM,const RawComment & R1,const RawComment & R2)700b57cec5SDimitry Andric bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
710b57cec5SDimitry Andric const RawComment &R2) {
720b57cec5SDimitry Andric SourceLocation L1 = R1.getBeginLoc();
730b57cec5SDimitry Andric SourceLocation L2 = R2.getBeginLoc();
740b57cec5SDimitry Andric bool Invalid = false;
750b57cec5SDimitry Andric unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
760b57cec5SDimitry Andric if (!Invalid) {
770b57cec5SDimitry Andric unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
780b57cec5SDimitry Andric return !Invalid && (C1 == C2);
790b57cec5SDimitry Andric }
800b57cec5SDimitry Andric return false;
810b57cec5SDimitry Andric }
820b57cec5SDimitry Andric } // unnamed namespace
830b57cec5SDimitry Andric
840b57cec5SDimitry Andric /// Determines whether there is only whitespace in `Buffer` between `P`
850b57cec5SDimitry Andric /// and the previous line.
860b57cec5SDimitry Andric /// \param Buffer The buffer to search in.
870b57cec5SDimitry Andric /// \param P The offset from the beginning of `Buffer` to start from.
880b57cec5SDimitry Andric /// \return true if all of the characters in `Buffer` ranging from the closest
890b57cec5SDimitry Andric /// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
900b57cec5SDimitry Andric /// are whitespace.
onlyWhitespaceOnLineBefore(const char * Buffer,unsigned P)910b57cec5SDimitry Andric static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
920b57cec5SDimitry Andric // Search backwards until we see linefeed or carriage return.
930b57cec5SDimitry Andric for (unsigned I = P; I != 0; --I) {
940b57cec5SDimitry Andric char C = Buffer[I - 1];
950b57cec5SDimitry Andric if (isVerticalWhitespace(C))
960b57cec5SDimitry Andric return true;
970b57cec5SDimitry Andric if (!isHorizontalWhitespace(C))
980b57cec5SDimitry Andric return false;
990b57cec5SDimitry Andric }
1000b57cec5SDimitry Andric // We hit the beginning of the buffer.
1010b57cec5SDimitry Andric return true;
1020b57cec5SDimitry Andric }
1030b57cec5SDimitry Andric
1040b57cec5SDimitry Andric /// Returns whether `K` is an ordinary comment kind.
isOrdinaryKind(RawComment::CommentKind K)1050b57cec5SDimitry Andric static bool isOrdinaryKind(RawComment::CommentKind K) {
1060b57cec5SDimitry Andric return (K == RawComment::RCK_OrdinaryBCPL) ||
1070b57cec5SDimitry Andric (K == RawComment::RCK_OrdinaryC);
1080b57cec5SDimitry Andric }
1090b57cec5SDimitry Andric
RawComment(const SourceManager & SourceMgr,SourceRange SR,const CommentOptions & CommentOpts,bool Merged)1100b57cec5SDimitry Andric RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
1110b57cec5SDimitry Andric const CommentOptions &CommentOpts, bool Merged) :
1120b57cec5SDimitry Andric Range(SR), RawTextValid(false), BriefTextValid(false),
1130b57cec5SDimitry Andric IsAttached(false), IsTrailingComment(false),
1140b57cec5SDimitry Andric IsAlmostTrailingComment(false) {
1150b57cec5SDimitry Andric // Extract raw comment text, if possible.
1160b57cec5SDimitry Andric if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
1170b57cec5SDimitry Andric Kind = RCK_Invalid;
1180b57cec5SDimitry Andric return;
1190b57cec5SDimitry Andric }
1200b57cec5SDimitry Andric
1210b57cec5SDimitry Andric // Guess comment kind.
1220b57cec5SDimitry Andric std::pair<CommentKind, bool> K =
1230b57cec5SDimitry Andric getCommentKind(RawText, CommentOpts.ParseAllComments);
1240b57cec5SDimitry Andric
1250b57cec5SDimitry Andric // Guess whether an ordinary comment is trailing.
1260b57cec5SDimitry Andric if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) {
1270b57cec5SDimitry Andric FileID BeginFileID;
1280b57cec5SDimitry Andric unsigned BeginOffset;
1290b57cec5SDimitry Andric std::tie(BeginFileID, BeginOffset) =
1300b57cec5SDimitry Andric SourceMgr.getDecomposedLoc(Range.getBegin());
1310b57cec5SDimitry Andric if (BeginOffset != 0) {
1320b57cec5SDimitry Andric bool Invalid = false;
1330b57cec5SDimitry Andric const char *Buffer =
1340b57cec5SDimitry Andric SourceMgr.getBufferData(BeginFileID, &Invalid).data();
1350b57cec5SDimitry Andric IsTrailingComment |=
1360b57cec5SDimitry Andric (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
1370b57cec5SDimitry Andric }
1380b57cec5SDimitry Andric }
1390b57cec5SDimitry Andric
1400b57cec5SDimitry Andric if (!Merged) {
1410b57cec5SDimitry Andric Kind = K.first;
1420b57cec5SDimitry Andric IsTrailingComment |= K.second;
1430b57cec5SDimitry Andric
144*5f757f3fSDimitry Andric IsAlmostTrailingComment =
145*5f757f3fSDimitry Andric RawText.starts_with("//<") || RawText.starts_with("/*<");
1460b57cec5SDimitry Andric } else {
1470b57cec5SDimitry Andric Kind = RCK_Merged;
1480b57cec5SDimitry Andric IsTrailingComment =
1490b57cec5SDimitry Andric IsTrailingComment || mergedCommentIsTrailingComment(RawText);
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric }
1520b57cec5SDimitry Andric
getRawTextSlow(const SourceManager & SourceMgr) const1530b57cec5SDimitry Andric StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
1540b57cec5SDimitry Andric FileID BeginFileID;
1550b57cec5SDimitry Andric FileID EndFileID;
1560b57cec5SDimitry Andric unsigned BeginOffset;
1570b57cec5SDimitry Andric unsigned EndOffset;
1580b57cec5SDimitry Andric
1590b57cec5SDimitry Andric std::tie(BeginFileID, BeginOffset) =
1600b57cec5SDimitry Andric SourceMgr.getDecomposedLoc(Range.getBegin());
1610b57cec5SDimitry Andric std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
1620b57cec5SDimitry Andric
1630b57cec5SDimitry Andric const unsigned Length = EndOffset - BeginOffset;
1640b57cec5SDimitry Andric if (Length < 2)
1650b57cec5SDimitry Andric return StringRef();
1660b57cec5SDimitry Andric
1670b57cec5SDimitry Andric // The comment can't begin in one file and end in another.
1680b57cec5SDimitry Andric assert(BeginFileID == EndFileID);
1690b57cec5SDimitry Andric
1700b57cec5SDimitry Andric bool Invalid = false;
1710b57cec5SDimitry Andric const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
1720b57cec5SDimitry Andric &Invalid).data();
1730b57cec5SDimitry Andric if (Invalid)
1740b57cec5SDimitry Andric return StringRef();
1750b57cec5SDimitry Andric
1760b57cec5SDimitry Andric return StringRef(BufferStart + BeginOffset, Length);
1770b57cec5SDimitry Andric }
1780b57cec5SDimitry Andric
extractBriefText(const ASTContext & Context) const1790b57cec5SDimitry Andric const char *RawComment::extractBriefText(const ASTContext &Context) const {
1800b57cec5SDimitry Andric // Lazily initialize RawText using the accessor before using it.
1810b57cec5SDimitry Andric (void)getRawText(Context.getSourceManager());
1820b57cec5SDimitry Andric
1830b57cec5SDimitry Andric // Since we will be copying the resulting text, all allocations made during
1840b57cec5SDimitry Andric // parsing are garbage after resulting string is formed. Thus we can use
1850b57cec5SDimitry Andric // a separate allocator for all temporary stuff.
1860b57cec5SDimitry Andric llvm::BumpPtrAllocator Allocator;
1870b57cec5SDimitry Andric
1880b57cec5SDimitry Andric comments::Lexer L(Allocator, Context.getDiagnostics(),
1890b57cec5SDimitry Andric Context.getCommentCommandTraits(),
1900b57cec5SDimitry Andric Range.getBegin(),
1910b57cec5SDimitry Andric RawText.begin(), RawText.end());
1920b57cec5SDimitry Andric comments::BriefParser P(L, Context.getCommentCommandTraits());
1930b57cec5SDimitry Andric
1940b57cec5SDimitry Andric const std::string Result = P.Parse();
1950b57cec5SDimitry Andric const unsigned BriefTextLength = Result.size();
1960b57cec5SDimitry Andric char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
1970b57cec5SDimitry Andric memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
1980b57cec5SDimitry Andric BriefText = BriefTextPtr;
1990b57cec5SDimitry Andric BriefTextValid = true;
2000b57cec5SDimitry Andric
2010b57cec5SDimitry Andric return BriefTextPtr;
2020b57cec5SDimitry Andric }
2030b57cec5SDimitry Andric
parse(const ASTContext & Context,const Preprocessor * PP,const Decl * D) const2040b57cec5SDimitry Andric comments::FullComment *RawComment::parse(const ASTContext &Context,
2050b57cec5SDimitry Andric const Preprocessor *PP,
2060b57cec5SDimitry Andric const Decl *D) const {
2070b57cec5SDimitry Andric // Lazily initialize RawText using the accessor before using it.
2080b57cec5SDimitry Andric (void)getRawText(Context.getSourceManager());
2090b57cec5SDimitry Andric
2100b57cec5SDimitry Andric comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
2110b57cec5SDimitry Andric Context.getCommentCommandTraits(),
2120b57cec5SDimitry Andric getSourceRange().getBegin(),
2130b57cec5SDimitry Andric RawText.begin(), RawText.end());
2140b57cec5SDimitry Andric comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
2150b57cec5SDimitry Andric Context.getDiagnostics(),
2160b57cec5SDimitry Andric Context.getCommentCommandTraits(),
2170b57cec5SDimitry Andric PP);
2180b57cec5SDimitry Andric S.setDecl(D);
2190b57cec5SDimitry Andric comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
2200b57cec5SDimitry Andric Context.getDiagnostics(),
2210b57cec5SDimitry Andric Context.getCommentCommandTraits());
2220b57cec5SDimitry Andric
2230b57cec5SDimitry Andric return P.parseFullComment();
2240b57cec5SDimitry Andric }
2250b57cec5SDimitry Andric
onlyWhitespaceBetween(SourceManager & SM,SourceLocation Loc1,SourceLocation Loc2,unsigned MaxNewlinesAllowed)2260b57cec5SDimitry Andric static bool onlyWhitespaceBetween(SourceManager &SM,
2270b57cec5SDimitry Andric SourceLocation Loc1, SourceLocation Loc2,
2280b57cec5SDimitry Andric unsigned MaxNewlinesAllowed) {
2290b57cec5SDimitry Andric std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
2300b57cec5SDimitry Andric std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
2310b57cec5SDimitry Andric
2320b57cec5SDimitry Andric // Question does not make sense if locations are in different files.
2330b57cec5SDimitry Andric if (Loc1Info.first != Loc2Info.first)
2340b57cec5SDimitry Andric return false;
2350b57cec5SDimitry Andric
2360b57cec5SDimitry Andric bool Invalid = false;
2370b57cec5SDimitry Andric const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
2380b57cec5SDimitry Andric if (Invalid)
2390b57cec5SDimitry Andric return false;
2400b57cec5SDimitry Andric
2410b57cec5SDimitry Andric unsigned NumNewlines = 0;
2420b57cec5SDimitry Andric assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
2430b57cec5SDimitry Andric // Look for non-whitespace characters and remember any newlines seen.
2440b57cec5SDimitry Andric for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
2450b57cec5SDimitry Andric switch (Buffer[I]) {
2460b57cec5SDimitry Andric default:
2470b57cec5SDimitry Andric return false;
2480b57cec5SDimitry Andric case ' ':
2490b57cec5SDimitry Andric case '\t':
2500b57cec5SDimitry Andric case '\f':
2510b57cec5SDimitry Andric case '\v':
2520b57cec5SDimitry Andric break;
2530b57cec5SDimitry Andric case '\r':
2540b57cec5SDimitry Andric case '\n':
2550b57cec5SDimitry Andric ++NumNewlines;
2560b57cec5SDimitry Andric
2570b57cec5SDimitry Andric // Check if we have found more than the maximum allowed number of
2580b57cec5SDimitry Andric // newlines.
2590b57cec5SDimitry Andric if (NumNewlines > MaxNewlinesAllowed)
2600b57cec5SDimitry Andric return false;
2610b57cec5SDimitry Andric
2620b57cec5SDimitry Andric // Collapse \r\n and \n\r into a single newline.
2630b57cec5SDimitry Andric if (I + 1 != Loc2Info.second &&
2640b57cec5SDimitry Andric (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
2650b57cec5SDimitry Andric Buffer[I] != Buffer[I + 1])
2660b57cec5SDimitry Andric ++I;
2670b57cec5SDimitry Andric break;
2680b57cec5SDimitry Andric }
2690b57cec5SDimitry Andric }
2700b57cec5SDimitry Andric
2710b57cec5SDimitry Andric return true;
2720b57cec5SDimitry Andric }
2730b57cec5SDimitry Andric
addComment(const RawComment & RC,const CommentOptions & CommentOpts,llvm::BumpPtrAllocator & Allocator)2740b57cec5SDimitry Andric void RawCommentList::addComment(const RawComment &RC,
2750b57cec5SDimitry Andric const CommentOptions &CommentOpts,
2760b57cec5SDimitry Andric llvm::BumpPtrAllocator &Allocator) {
2770b57cec5SDimitry Andric if (RC.isInvalid())
2780b57cec5SDimitry Andric return;
2790b57cec5SDimitry Andric
2800b57cec5SDimitry Andric // Ordinary comments are not interesting for us.
2810b57cec5SDimitry Andric if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
2820b57cec5SDimitry Andric return;
2830b57cec5SDimitry Andric
284a7dea167SDimitry Andric std::pair<FileID, unsigned> Loc =
285a7dea167SDimitry Andric SourceMgr.getDecomposedLoc(RC.getBeginLoc());
286a7dea167SDimitry Andric
287a7dea167SDimitry Andric const FileID CommentFile = Loc.first;
288a7dea167SDimitry Andric const unsigned CommentOffset = Loc.second;
289a7dea167SDimitry Andric
2900b57cec5SDimitry Andric // If this is the first Doxygen comment, save it (because there isn't
2910b57cec5SDimitry Andric // anything to merge it with).
292a7dea167SDimitry Andric if (OrderedComments[CommentFile].empty()) {
293a7dea167SDimitry Andric OrderedComments[CommentFile][CommentOffset] =
294a7dea167SDimitry Andric new (Allocator) RawComment(RC);
2950b57cec5SDimitry Andric return;
2960b57cec5SDimitry Andric }
2970b57cec5SDimitry Andric
298a7dea167SDimitry Andric const RawComment &C1 = *OrderedComments[CommentFile].rbegin()->second;
2990b57cec5SDimitry Andric const RawComment &C2 = RC;
3000b57cec5SDimitry Andric
3010b57cec5SDimitry Andric // Merge comments only if there is only whitespace between them.
3020b57cec5SDimitry Andric // Can't merge trailing and non-trailing comments unless the second is
3030b57cec5SDimitry Andric // non-trailing ordinary in the same column, as in the case:
3040b57cec5SDimitry Andric // int x; // documents x
3050b57cec5SDimitry Andric // // more text
3060b57cec5SDimitry Andric // versus:
3070b57cec5SDimitry Andric // int x; // documents x
3080b57cec5SDimitry Andric // int y; // documents y
3090b57cec5SDimitry Andric // or:
3100b57cec5SDimitry Andric // int x; // documents x
3110b57cec5SDimitry Andric // // documents y
3120b57cec5SDimitry Andric // int y;
3130b57cec5SDimitry Andric // Merge comments if they are on same or consecutive lines.
3140b57cec5SDimitry Andric if ((C1.isTrailingComment() == C2.isTrailingComment() ||
3150b57cec5SDimitry Andric (C1.isTrailingComment() && !C2.isTrailingComment() &&
3160b57cec5SDimitry Andric isOrdinaryKind(C2.getKind()) &&
3170b57cec5SDimitry Andric commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
3180b57cec5SDimitry Andric onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(),
3190b57cec5SDimitry Andric /*MaxNewlinesAllowed=*/1)) {
3200b57cec5SDimitry Andric SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
321a7dea167SDimitry Andric *OrderedComments[CommentFile].rbegin()->second =
322a7dea167SDimitry Andric RawComment(SourceMgr, MergedRange, CommentOpts, true);
3230b57cec5SDimitry Andric } else {
324a7dea167SDimitry Andric OrderedComments[CommentFile][CommentOffset] =
325a7dea167SDimitry Andric new (Allocator) RawComment(RC);
3260b57cec5SDimitry Andric }
3270b57cec5SDimitry Andric }
3280b57cec5SDimitry Andric
329a7dea167SDimitry Andric const std::map<unsigned, RawComment *> *
getCommentsInFile(FileID File) const330a7dea167SDimitry Andric RawCommentList::getCommentsInFile(FileID File) const {
331a7dea167SDimitry Andric auto CommentsInFile = OrderedComments.find(File);
332a7dea167SDimitry Andric if (CommentsInFile == OrderedComments.end())
333a7dea167SDimitry Andric return nullptr;
3340b57cec5SDimitry Andric
335a7dea167SDimitry Andric return &CommentsInFile->second;
336a7dea167SDimitry Andric }
337a7dea167SDimitry Andric
empty() const338a7dea167SDimitry Andric bool RawCommentList::empty() const { return OrderedComments.empty(); }
339a7dea167SDimitry Andric
getCommentBeginLine(RawComment * C,FileID File,unsigned Offset) const340a7dea167SDimitry Andric unsigned RawCommentList::getCommentBeginLine(RawComment *C, FileID File,
341a7dea167SDimitry Andric unsigned Offset) const {
342a7dea167SDimitry Andric auto Cached = CommentBeginLine.find(C);
343a7dea167SDimitry Andric if (Cached != CommentBeginLine.end())
344a7dea167SDimitry Andric return Cached->second;
345a7dea167SDimitry Andric const unsigned Line = SourceMgr.getLineNumber(File, Offset);
346a7dea167SDimitry Andric CommentBeginLine[C] = Line;
347a7dea167SDimitry Andric return Line;
348a7dea167SDimitry Andric }
349a7dea167SDimitry Andric
getCommentEndOffset(RawComment * C) const350a7dea167SDimitry Andric unsigned RawCommentList::getCommentEndOffset(RawComment *C) const {
351a7dea167SDimitry Andric auto Cached = CommentEndOffset.find(C);
352a7dea167SDimitry Andric if (Cached != CommentEndOffset.end())
353a7dea167SDimitry Andric return Cached->second;
354a7dea167SDimitry Andric const unsigned Offset =
355a7dea167SDimitry Andric SourceMgr.getDecomposedLoc(C->getSourceRange().getEnd()).second;
356a7dea167SDimitry Andric CommentEndOffset[C] = Offset;
357a7dea167SDimitry Andric return Offset;
3580b57cec5SDimitry Andric }
3590b57cec5SDimitry Andric
getFormattedText(const SourceManager & SourceMgr,DiagnosticsEngine & Diags) const3600b57cec5SDimitry Andric std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
3610b57cec5SDimitry Andric DiagnosticsEngine &Diags) const {
3620b57cec5SDimitry Andric llvm::StringRef CommentText = getRawText(SourceMgr);
3630b57cec5SDimitry Andric if (CommentText.empty())
3640b57cec5SDimitry Andric return "";
3650b57cec5SDimitry Andric
36681ad6265SDimitry Andric std::string Result;
36781ad6265SDimitry Andric for (const RawComment::CommentLine &Line :
36881ad6265SDimitry Andric getFormattedLines(SourceMgr, Diags))
36981ad6265SDimitry Andric Result += Line.Text + "\n";
37081ad6265SDimitry Andric
37181ad6265SDimitry Andric auto LastChar = Result.find_last_not_of('\n');
37281ad6265SDimitry Andric Result.erase(LastChar + 1, Result.size());
37381ad6265SDimitry Andric
37481ad6265SDimitry Andric return Result;
37581ad6265SDimitry Andric }
37681ad6265SDimitry Andric
37781ad6265SDimitry Andric std::vector<RawComment::CommentLine>
getFormattedLines(const SourceManager & SourceMgr,DiagnosticsEngine & Diags) const37881ad6265SDimitry Andric RawComment::getFormattedLines(const SourceManager &SourceMgr,
37981ad6265SDimitry Andric DiagnosticsEngine &Diags) const {
38081ad6265SDimitry Andric llvm::StringRef CommentText = getRawText(SourceMgr);
38181ad6265SDimitry Andric if (CommentText.empty())
38281ad6265SDimitry Andric return {};
38381ad6265SDimitry Andric
3840b57cec5SDimitry Andric llvm::BumpPtrAllocator Allocator;
3850b57cec5SDimitry Andric // We do not parse any commands, so CommentOptions are ignored by
3860b57cec5SDimitry Andric // comments::Lexer. Therefore, we just use default-constructed options.
3870b57cec5SDimitry Andric CommentOptions DefOpts;
3880b57cec5SDimitry Andric comments::CommandTraits EmptyTraits(Allocator, DefOpts);
3890b57cec5SDimitry Andric comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
3900b57cec5SDimitry Andric CommentText.begin(), CommentText.end(),
3910b57cec5SDimitry Andric /*ParseCommands=*/false);
3920b57cec5SDimitry Andric
39381ad6265SDimitry Andric std::vector<RawComment::CommentLine> Result;
3940b57cec5SDimitry Andric // A column number of the first non-whitespace token in the comment text.
3950b57cec5SDimitry Andric // We skip whitespace up to this column, but keep the whitespace after this
3960b57cec5SDimitry Andric // column. IndentColumn is calculated when lexing the first line and reused
3970b57cec5SDimitry Andric // for the rest of lines.
3980b57cec5SDimitry Andric unsigned IndentColumn = 0;
3990b57cec5SDimitry Andric
40081ad6265SDimitry Andric // Record the line number of the last processed comment line.
40181ad6265SDimitry Andric // For block-style comments, an extra newline token will be produced after
40281ad6265SDimitry Andric // the end-comment marker, e.g.:
40381ad6265SDimitry Andric // /** This is a multi-line comment block.
40481ad6265SDimitry Andric // The lexer will produce two newline tokens here > */
40581ad6265SDimitry Andric // previousLine will record the line number when we previously saw a newline
40681ad6265SDimitry Andric // token and recorded a comment line. If we see another newline token on the
40781ad6265SDimitry Andric // same line, don't record anything in between.
40881ad6265SDimitry Andric unsigned PreviousLine = 0;
40981ad6265SDimitry Andric
4100b57cec5SDimitry Andric // Processes one line of the comment and adds it to the result.
4110b57cec5SDimitry Andric // Handles skipping the indent at the start of the line.
4120b57cec5SDimitry Andric // Returns false when eof is reached and true otherwise.
4130b57cec5SDimitry Andric auto LexLine = [&](bool IsFirstLine) -> bool {
4140b57cec5SDimitry Andric comments::Token Tok;
4150b57cec5SDimitry Andric // Lex the first token on the line. We handle it separately, because we to
4160b57cec5SDimitry Andric // fix up its indentation.
4170b57cec5SDimitry Andric L.lex(Tok);
4180b57cec5SDimitry Andric if (Tok.is(comments::tok::eof))
4190b57cec5SDimitry Andric return false;
4200b57cec5SDimitry Andric if (Tok.is(comments::tok::newline)) {
42181ad6265SDimitry Andric PresumedLoc Loc = SourceMgr.getPresumedLoc(Tok.getLocation());
42281ad6265SDimitry Andric if (Loc.getLine() != PreviousLine) {
42381ad6265SDimitry Andric Result.emplace_back("", Loc, Loc);
42481ad6265SDimitry Andric PreviousLine = Loc.getLine();
42581ad6265SDimitry Andric }
4260b57cec5SDimitry Andric return true;
4270b57cec5SDimitry Andric }
42881ad6265SDimitry Andric SmallString<124> Line;
4290b57cec5SDimitry Andric llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
4300b57cec5SDimitry Andric bool LocInvalid = false;
4310b57cec5SDimitry Andric unsigned TokColumn =
4320b57cec5SDimitry Andric SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
4330b57cec5SDimitry Andric assert(!LocInvalid && "getFormattedText for invalid location");
4340b57cec5SDimitry Andric
4350b57cec5SDimitry Andric // Amount of leading whitespace in TokText.
4360b57cec5SDimitry Andric size_t WhitespaceLen = TokText.find_first_not_of(" \t");
4370b57cec5SDimitry Andric if (WhitespaceLen == StringRef::npos)
4380b57cec5SDimitry Andric WhitespaceLen = TokText.size();
4390b57cec5SDimitry Andric // Remember the amount of whitespace we skipped in the first line to remove
4400b57cec5SDimitry Andric // indent up to that column in the following lines.
4410b57cec5SDimitry Andric if (IsFirstLine)
4420b57cec5SDimitry Andric IndentColumn = TokColumn + WhitespaceLen;
4430b57cec5SDimitry Andric
4440b57cec5SDimitry Andric // Amount of leading whitespace we actually want to skip.
4450b57cec5SDimitry Andric // For the first line we skip all the whitespace.
4460b57cec5SDimitry Andric // For the rest of the lines, we skip whitespace up to IndentColumn.
4470b57cec5SDimitry Andric unsigned SkipLen =
4480b57cec5SDimitry Andric IsFirstLine
4490b57cec5SDimitry Andric ? WhitespaceLen
4500b57cec5SDimitry Andric : std::min<size_t>(
4510b57cec5SDimitry Andric WhitespaceLen,
4520b57cec5SDimitry Andric std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
4530b57cec5SDimitry Andric llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
45481ad6265SDimitry Andric Line += Trimmed;
45581ad6265SDimitry Andric // Get the beginning location of the adjusted comment line.
45681ad6265SDimitry Andric PresumedLoc Begin =
45781ad6265SDimitry Andric SourceMgr.getPresumedLoc(Tok.getLocation().getLocWithOffset(SkipLen));
45881ad6265SDimitry Andric
4590b57cec5SDimitry Andric // Lex all tokens in the rest of the line.
4600b57cec5SDimitry Andric for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
4610b57cec5SDimitry Andric if (Tok.is(comments::tok::newline)) {
46281ad6265SDimitry Andric // Get the ending location of the comment line.
46381ad6265SDimitry Andric PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
46481ad6265SDimitry Andric if (End.getLine() != PreviousLine) {
46581ad6265SDimitry Andric Result.emplace_back(Line, Begin, End);
46681ad6265SDimitry Andric PreviousLine = End.getLine();
46781ad6265SDimitry Andric }
4680b57cec5SDimitry Andric return true;
4690b57cec5SDimitry Andric }
47081ad6265SDimitry Andric Line += L.getSpelling(Tok, SourceMgr);
4710b57cec5SDimitry Andric }
47281ad6265SDimitry Andric PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
47381ad6265SDimitry Andric Result.emplace_back(Line, Begin, End);
4740b57cec5SDimitry Andric // We've reached the end of file token.
4750b57cec5SDimitry Andric return false;
4760b57cec5SDimitry Andric };
4770b57cec5SDimitry Andric
4780b57cec5SDimitry Andric // Process first line separately to remember indent for the following lines.
47981ad6265SDimitry Andric if (!LexLine(/*IsFirstLine=*/true))
4800b57cec5SDimitry Andric return Result;
4810b57cec5SDimitry Andric // Process the rest of the lines.
4820b57cec5SDimitry Andric while (LexLine(/*IsFirstLine=*/false))
4830b57cec5SDimitry Andric ;
4840b57cec5SDimitry Andric return Result;
4850b57cec5SDimitry Andric }
486