lib/AST/RawCommentList.cpp

e5dd7070Spatrick//===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
e5dd7070Spatrick//
e5dd7070Spatrick// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
e5dd7070Spatrick// See https://llvm.org/LICENSE.txt for license information.
e5dd7070Spatrick// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
e5dd7070Spatrick//
e5dd7070Spatrick//===----------------------------------------------------------------------===//
e5dd7070Spatrick
e5dd7070Spatrick#include "clang/AST/RawCommentList.h"
e5dd7070Spatrick#include "clang/AST/ASTContext.h"
e5dd7070Spatrick#include "clang/AST/Comment.h"
e5dd7070Spatrick#include "clang/AST/CommentBriefParser.h"
e5dd7070Spatrick#include "clang/AST/CommentCommandTraits.h"
e5dd7070Spatrick#include "clang/AST/CommentLexer.h"
e5dd7070Spatrick#include "clang/AST/CommentParser.h"
e5dd7070Spatrick#include "clang/AST/CommentSema.h"
e5dd7070Spatrick#include "clang/Basic/CharInfo.h"
e5dd7070Spatrick#include "llvm/ADT/STLExtras.h"
*12c85518Srobert#include "llvm/ADT/StringExtras.h"
ec727ea7Spatrick#include "llvm/Support/Allocator.h"
e5dd7070Spatrick
e5dd7070Spatrickusing namespace clang;
e5dd7070Spatrick
e5dd7070Spatricknamespace {
e5dd7070Spatrick/// Get comment kind and bool describing if it is a trailing comment.
e5dd7070Spatrickstd::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
e5dd7070Spatrick                                                        bool ParseAllComments) {
e5dd7070Spatrick  const size_t MinCommentLength = ParseAllComments ? 2 : 3;
e5dd7070Spatrick  if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
e5dd7070Spatrick    return std::make_pair(RawComment::RCK_Invalid, false);
e5dd7070Spatrick
e5dd7070Spatrick  RawComment::CommentKind K;
e5dd7070Spatrick  if (Comment[1] == '/') {
e5dd7070Spatrick    if (Comment.size() < 3)
e5dd7070Spatrick      return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
e5dd7070Spatrick
e5dd7070Spatrick    if (Comment[2] == '/')
e5dd7070Spatrick      K = RawComment::RCK_BCPLSlash;
e5dd7070Spatrick    else if (Comment[2] == '!')
e5dd7070Spatrick      K = RawComment::RCK_BCPLExcl;
e5dd7070Spatrick    else
e5dd7070Spatrick      return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
e5dd7070Spatrick  } else {
e5dd7070Spatrick    assert(Comment.size() >= 4);
e5dd7070Spatrick
e5dd7070Spatrick    // Comment lexer does not understand escapes in comment markers, so pretend
e5dd7070Spatrick    // that this is not a comment.
e5dd7070Spatrick    if (Comment[1] != '*' ||
e5dd7070Spatrick        Comment[Comment.size() - 2] != '*' ||
e5dd7070Spatrick        Comment[Comment.size() - 1] != '/')
e5dd7070Spatrick      return std::make_pair(RawComment::RCK_Invalid, false);
e5dd7070Spatrick
e5dd7070Spatrick    if (Comment[2] == '*')
e5dd7070Spatrick      K = RawComment::RCK_JavaDoc;
e5dd7070Spatrick    else if (Comment[2] == '!')
e5dd7070Spatrick      K = RawComment::RCK_Qt;
e5dd7070Spatrick    else
e5dd7070Spatrick      return std::make_pair(RawComment::RCK_OrdinaryC, false);
e5dd7070Spatrick  }
e5dd7070Spatrick  const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
e5dd7070Spatrick  return std::make_pair(K, TrailingComment);
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070Spatrickbool mergedCommentIsTrailingComment(StringRef Comment) {
e5dd7070Spatrick  return (Comment.size() > 3) && (Comment[3] == '<');
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070Spatrick/// Returns true if R1 and R2 both have valid locations that start on the same
e5dd7070Spatrick/// column.
e5dd7070Spatrickbool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
e5dd7070Spatrick                               const RawComment &R2) {
e5dd7070Spatrick  SourceLocation L1 = R1.getBeginLoc();
e5dd7070Spatrick  SourceLocation L2 = R2.getBeginLoc();
e5dd7070Spatrick  bool Invalid = false;
e5dd7070Spatrick  unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
e5dd7070Spatrick  if (!Invalid) {
e5dd7070Spatrick    unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
e5dd7070Spatrick    return !Invalid && (C1 == C2);
e5dd7070Spatrick  }
e5dd7070Spatrick  return false;
e5dd7070Spatrick}
e5dd7070Spatrick} // unnamed namespace
e5dd7070Spatrick
e5dd7070Spatrick/// Determines whether there is only whitespace in `Buffer` between `P`
e5dd7070Spatrick/// and the previous line.
e5dd7070Spatrick/// \param Buffer The buffer to search in.
e5dd7070Spatrick/// \param P The offset from the beginning of `Buffer` to start from.
e5dd7070Spatrick/// \return true if all of the characters in `Buffer` ranging from the closest
e5dd7070Spatrick/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
e5dd7070Spatrick/// are whitespace.
e5dd7070Spatrickstatic bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
e5dd7070Spatrick  // Search backwards until we see linefeed or carriage return.
e5dd7070Spatrick  for (unsigned I = P; I != 0; --I) {
e5dd7070Spatrick    char C = Buffer[I - 1];
e5dd7070Spatrick    if (isVerticalWhitespace(C))
e5dd7070Spatrick      return true;
e5dd7070Spatrick    if (!isHorizontalWhitespace(C))
e5dd7070Spatrick      return false;
e5dd7070Spatrick  }
e5dd7070Spatrick  // We hit the beginning of the buffer.
e5dd7070Spatrick  return true;
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070Spatrick/// Returns whether `K` is an ordinary comment kind.
e5dd7070Spatrickstatic bool isOrdinaryKind(RawComment::CommentKind K) {
e5dd7070Spatrick  return (K == RawComment::RCK_OrdinaryBCPL) ||
e5dd7070Spatrick         (K == RawComment::RCK_OrdinaryC);
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070SpatrickRawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
e5dd7070Spatrick                       const CommentOptions &CommentOpts, bool Merged) :
e5dd7070Spatrick    Range(SR), RawTextValid(false), BriefTextValid(false),
e5dd7070Spatrick    IsAttached(false), IsTrailingComment(false),
e5dd7070Spatrick    IsAlmostTrailingComment(false) {
e5dd7070Spatrick  // Extract raw comment text, if possible.
e5dd7070Spatrick  if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
e5dd7070Spatrick    Kind = RCK_Invalid;
e5dd7070Spatrick    return;
e5dd7070Spatrick  }
e5dd7070Spatrick
e5dd7070Spatrick  // Guess comment kind.
e5dd7070Spatrick  std::pair<CommentKind, bool> K =
e5dd7070Spatrick      getCommentKind(RawText, CommentOpts.ParseAllComments);
e5dd7070Spatrick
e5dd7070Spatrick  // Guess whether an ordinary comment is trailing.
e5dd7070Spatrick  if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) {
e5dd7070Spatrick    FileID BeginFileID;
e5dd7070Spatrick    unsigned BeginOffset;
e5dd7070Spatrick    std::tie(BeginFileID, BeginOffset) =
e5dd7070Spatrick        SourceMgr.getDecomposedLoc(Range.getBegin());
e5dd7070Spatrick    if (BeginOffset != 0) {
e5dd7070Spatrick      bool Invalid = false;
e5dd7070Spatrick      const char *Buffer =
e5dd7070Spatrick          SourceMgr.getBufferData(BeginFileID, &Invalid).data();
e5dd7070Spatrick      IsTrailingComment |=
e5dd7070Spatrick          (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
e5dd7070Spatrick    }
e5dd7070Spatrick  }
e5dd7070Spatrick
e5dd7070Spatrick  if (!Merged) {
e5dd7070Spatrick    Kind = K.first;
e5dd7070Spatrick    IsTrailingComment |= K.second;
e5dd7070Spatrick
e5dd7070Spatrick    IsAlmostTrailingComment = RawText.startswith("//<") ||
e5dd7070Spatrick                                 RawText.startswith("/*<");
e5dd7070Spatrick  } else {
e5dd7070Spatrick    Kind = RCK_Merged;
e5dd7070Spatrick    IsTrailingComment =
e5dd7070Spatrick        IsTrailingComment || mergedCommentIsTrailingComment(RawText);
e5dd7070Spatrick  }
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070SpatrickStringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
e5dd7070Spatrick  FileID BeginFileID;
e5dd7070Spatrick  FileID EndFileID;
e5dd7070Spatrick  unsigned BeginOffset;
e5dd7070Spatrick  unsigned EndOffset;
e5dd7070Spatrick
e5dd7070Spatrick  std::tie(BeginFileID, BeginOffset) =
e5dd7070Spatrick      SourceMgr.getDecomposedLoc(Range.getBegin());
e5dd7070Spatrick  std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
e5dd7070Spatrick
e5dd7070Spatrick  const unsigned Length = EndOffset - BeginOffset;
e5dd7070Spatrick  if (Length < 2)
e5dd7070Spatrick    return StringRef();
e5dd7070Spatrick
e5dd7070Spatrick  // The comment can't begin in one file and end in another.
e5dd7070Spatrick  assert(BeginFileID == EndFileID);
e5dd7070Spatrick
e5dd7070Spatrick  bool Invalid = false;
e5dd7070Spatrick  const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
e5dd7070Spatrick                                                    &Invalid).data();
e5dd7070Spatrick  if (Invalid)
e5dd7070Spatrick    return StringRef();
e5dd7070Spatrick
e5dd7070Spatrick  return StringRef(BufferStart + BeginOffset, Length);
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070Spatrickconst char *RawComment::extractBriefText(const ASTContext &Context) const {
e5dd7070Spatrick  // Lazily initialize RawText using the accessor before using it.
e5dd7070Spatrick  (void)getRawText(Context.getSourceManager());
e5dd7070Spatrick
e5dd7070Spatrick  // Since we will be copying the resulting text, all allocations made during
e5dd7070Spatrick  // parsing are garbage after resulting string is formed.  Thus we can use
e5dd7070Spatrick  // a separate allocator for all temporary stuff.
e5dd7070Spatrick  llvm::BumpPtrAllocator Allocator;
e5dd7070Spatrick
e5dd7070Spatrick  comments::Lexer L(Allocator, Context.getDiagnostics(),
e5dd7070Spatrick                    Context.getCommentCommandTraits(),
e5dd7070Spatrick                    Range.getBegin(),
e5dd7070Spatrick                    RawText.begin(), RawText.end());
e5dd7070Spatrick  comments::BriefParser P(L, Context.getCommentCommandTraits());
e5dd7070Spatrick
e5dd7070Spatrick  const std::string Result = P.Parse();
e5dd7070Spatrick  const unsigned BriefTextLength = Result.size();
e5dd7070Spatrick  char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
e5dd7070Spatrick  memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
e5dd7070Spatrick  BriefText = BriefTextPtr;
e5dd7070Spatrick  BriefTextValid = true;
e5dd7070Spatrick
e5dd7070Spatrick  return BriefTextPtr;
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070Spatrickcomments::FullComment *RawComment::parse(const ASTContext &Context,
e5dd7070Spatrick                                         const Preprocessor *PP,
e5dd7070Spatrick                                         const Decl *D) const {
e5dd7070Spatrick  // Lazily initialize RawText using the accessor before using it.
e5dd7070Spatrick  (void)getRawText(Context.getSourceManager());
e5dd7070Spatrick
e5dd7070Spatrick  comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
e5dd7070Spatrick                    Context.getCommentCommandTraits(),
e5dd7070Spatrick                    getSourceRange().getBegin(),
e5dd7070Spatrick                    RawText.begin(), RawText.end());
e5dd7070Spatrick  comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
e5dd7070Spatrick                   Context.getDiagnostics(),
e5dd7070Spatrick                   Context.getCommentCommandTraits(),
e5dd7070Spatrick                   PP);
e5dd7070Spatrick  S.setDecl(D);
e5dd7070Spatrick  comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
e5dd7070Spatrick                     Context.getDiagnostics(),
e5dd7070Spatrick                     Context.getCommentCommandTraits());
e5dd7070Spatrick
e5dd7070Spatrick  return P.parseFullComment();
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070Spatrickstatic bool onlyWhitespaceBetween(SourceManager &SM,
e5dd7070Spatrick                                  SourceLocation Loc1, SourceLocation Loc2,
e5dd7070Spatrick                                  unsigned MaxNewlinesAllowed) {
e5dd7070Spatrick  std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
e5dd7070Spatrick  std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
e5dd7070Spatrick
e5dd7070Spatrick  // Question does not make sense if locations are in different files.
e5dd7070Spatrick  if (Loc1Info.first != Loc2Info.first)
e5dd7070Spatrick    return false;
e5dd7070Spatrick
e5dd7070Spatrick  bool Invalid = false;
e5dd7070Spatrick  const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
e5dd7070Spatrick  if (Invalid)
e5dd7070Spatrick    return false;
e5dd7070Spatrick
e5dd7070Spatrick  unsigned NumNewlines = 0;
e5dd7070Spatrick  assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
e5dd7070Spatrick  // Look for non-whitespace characters and remember any newlines seen.
e5dd7070Spatrick  for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
e5dd7070Spatrick    switch (Buffer[I]) {
e5dd7070Spatrick    default:
e5dd7070Spatrick      return false;
e5dd7070Spatrick    case ' ':
e5dd7070Spatrick    case '\t':
e5dd7070Spatrick    case '\f':
e5dd7070Spatrick    case '\v':
e5dd7070Spatrick      break;
e5dd7070Spatrick    case '\r':
e5dd7070Spatrick    case '\n':
e5dd7070Spatrick      ++NumNewlines;
e5dd7070Spatrick
e5dd7070Spatrick      // Check if we have found more than the maximum allowed number of
e5dd7070Spatrick      // newlines.
e5dd7070Spatrick      if (NumNewlines > MaxNewlinesAllowed)
e5dd7070Spatrick        return false;
e5dd7070Spatrick
e5dd7070Spatrick      // Collapse \r\n and \n\r into a single newline.
e5dd7070Spatrick      if (I + 1 != Loc2Info.second &&
e5dd7070Spatrick          (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
e5dd7070Spatrick          Buffer[I] != Buffer[I + 1])
e5dd7070Spatrick        ++I;
e5dd7070Spatrick      break;
e5dd7070Spatrick    }
e5dd7070Spatrick  }
e5dd7070Spatrick
e5dd7070Spatrick  return true;
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070Spatrickvoid RawCommentList::addComment(const RawComment &RC,
e5dd7070Spatrick                                const CommentOptions &CommentOpts,
e5dd7070Spatrick                                llvm::BumpPtrAllocator &Allocator) {
e5dd7070Spatrick  if (RC.isInvalid())
e5dd7070Spatrick    return;
e5dd7070Spatrick
e5dd7070Spatrick  // Ordinary comments are not interesting for us.
e5dd7070Spatrick  if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
e5dd7070Spatrick    return;
e5dd7070Spatrick
e5dd7070Spatrick  std::pair<FileID, unsigned> Loc =
e5dd7070Spatrick      SourceMgr.getDecomposedLoc(RC.getBeginLoc());
e5dd7070Spatrick
e5dd7070Spatrick  const FileID CommentFile = Loc.first;
e5dd7070Spatrick  const unsigned CommentOffset = Loc.second;
e5dd7070Spatrick
e5dd7070Spatrick  // If this is the first Doxygen comment, save it (because there isn't
e5dd7070Spatrick  // anything to merge it with).
e5dd7070Spatrick  if (OrderedComments[CommentFile].empty()) {
e5dd7070Spatrick    OrderedComments[CommentFile][CommentOffset] =
e5dd7070Spatrick        new (Allocator) RawComment(RC);
e5dd7070Spatrick    return;
e5dd7070Spatrick  }
e5dd7070Spatrick
e5dd7070Spatrick  const RawComment &C1 = *OrderedComments[CommentFile].rbegin()->second;
e5dd7070Spatrick  const RawComment &C2 = RC;
e5dd7070Spatrick
e5dd7070Spatrick  // Merge comments only if there is only whitespace between them.
e5dd7070Spatrick  // Can't merge trailing and non-trailing comments unless the second is
e5dd7070Spatrick  // non-trailing ordinary in the same column, as in the case:
e5dd7070Spatrick  //   int x; // documents x
e5dd7070Spatrick  //          // more text
e5dd7070Spatrick  // versus:
e5dd7070Spatrick  //   int x; // documents x
e5dd7070Spatrick  //   int y; // documents y
e5dd7070Spatrick  // or:
e5dd7070Spatrick  //   int x; // documents x
e5dd7070Spatrick  //   // documents y
e5dd7070Spatrick  //   int y;
e5dd7070Spatrick  // Merge comments if they are on same or consecutive lines.
e5dd7070Spatrick  if ((C1.isTrailingComment() == C2.isTrailingComment() ||
e5dd7070Spatrick       (C1.isTrailingComment() && !C2.isTrailingComment() &&
e5dd7070Spatrick        isOrdinaryKind(C2.getKind()) &&
e5dd7070Spatrick        commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
e5dd7070Spatrick      onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(),
e5dd7070Spatrick                            /*MaxNewlinesAllowed=*/1)) {
e5dd7070Spatrick    SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
e5dd7070Spatrick    *OrderedComments[CommentFile].rbegin()->second =
e5dd7070Spatrick        RawComment(SourceMgr, MergedRange, CommentOpts, true);
e5dd7070Spatrick  } else {
e5dd7070Spatrick    OrderedComments[CommentFile][CommentOffset] =
e5dd7070Spatrick        new (Allocator) RawComment(RC);
e5dd7070Spatrick  }
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070Spatrickconst std::map<unsigned, RawComment *> *
e5dd7070SpatrickRawCommentList::getCommentsInFile(FileID File) const {
e5dd7070Spatrick  auto CommentsInFile = OrderedComments.find(File);
e5dd7070Spatrick  if (CommentsInFile == OrderedComments.end())
e5dd7070Spatrick    return nullptr;
e5dd7070Spatrick
e5dd7070Spatrick  return &CommentsInFile->second;
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070Spatrickbool RawCommentList::empty() const { return OrderedComments.empty(); }
e5dd7070Spatrick
e5dd7070Spatrickunsigned RawCommentList::getCommentBeginLine(RawComment *C, FileID File,
e5dd7070Spatrick                                             unsigned Offset) const {
e5dd7070Spatrick  auto Cached = CommentBeginLine.find(C);
e5dd7070Spatrick  if (Cached != CommentBeginLine.end())
e5dd7070Spatrick    return Cached->second;
e5dd7070Spatrick  const unsigned Line = SourceMgr.getLineNumber(File, Offset);
e5dd7070Spatrick  CommentBeginLine[C] = Line;
e5dd7070Spatrick  return Line;
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070Spatrickunsigned RawCommentList::getCommentEndOffset(RawComment *C) const {
e5dd7070Spatrick  auto Cached = CommentEndOffset.find(C);
e5dd7070Spatrick  if (Cached != CommentEndOffset.end())
e5dd7070Spatrick    return Cached->second;
e5dd7070Spatrick  const unsigned Offset =
e5dd7070Spatrick      SourceMgr.getDecomposedLoc(C->getSourceRange().getEnd()).second;
e5dd7070Spatrick  CommentEndOffset[C] = Offset;
e5dd7070Spatrick  return Offset;
e5dd7070Spatrick}
e5dd7070Spatrick
e5dd7070Spatrickstd::string RawComment::getFormattedText(const SourceManager &SourceMgr,
e5dd7070Spatrick                                         DiagnosticsEngine &Diags) const {
e5dd7070Spatrick  llvm::StringRef CommentText = getRawText(SourceMgr);
e5dd7070Spatrick  if (CommentText.empty())
e5dd7070Spatrick    return "";
e5dd7070Spatrick
*12c85518Srobert  std::string Result;
*12c85518Srobert  for (const RawComment::CommentLine &Line :
*12c85518Srobert       getFormattedLines(SourceMgr, Diags))
*12c85518Srobert    Result += Line.Text + "\n";
*12c85518Srobert
*12c85518Srobert  auto LastChar = Result.find_last_not_of('\n');
*12c85518Srobert  Result.erase(LastChar + 1, Result.size());
*12c85518Srobert
*12c85518Srobert  return Result;
*12c85518Srobert}
*12c85518Srobert
*12c85518Srobertstd::vector<RawComment::CommentLine>
*12c85518SrobertRawComment::getFormattedLines(const SourceManager &SourceMgr,
*12c85518Srobert                              DiagnosticsEngine &Diags) const {
*12c85518Srobert  llvm::StringRef CommentText = getRawText(SourceMgr);
*12c85518Srobert  if (CommentText.empty())
*12c85518Srobert    return {};
*12c85518Srobert
e5dd7070Spatrick  llvm::BumpPtrAllocator Allocator;
e5dd7070Spatrick  // We do not parse any commands, so CommentOptions are ignored by
e5dd7070Spatrick  // comments::Lexer. Therefore, we just use default-constructed options.
e5dd7070Spatrick  CommentOptions DefOpts;
e5dd7070Spatrick  comments::CommandTraits EmptyTraits(Allocator, DefOpts);
e5dd7070Spatrick  comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
e5dd7070Spatrick                    CommentText.begin(), CommentText.end(),
e5dd7070Spatrick                    /*ParseCommands=*/false);
e5dd7070Spatrick
*12c85518Srobert  std::vector<RawComment::CommentLine> Result;
e5dd7070Spatrick  // A column number of the first non-whitespace token in the comment text.
e5dd7070Spatrick  // We skip whitespace up to this column, but keep the whitespace after this
e5dd7070Spatrick  // column. IndentColumn is calculated when lexing the first line and reused
e5dd7070Spatrick  // for the rest of lines.
e5dd7070Spatrick  unsigned IndentColumn = 0;
e5dd7070Spatrick
*12c85518Srobert  // Record the line number of the last processed comment line.
*12c85518Srobert  // For block-style comments, an extra newline token will be produced after
*12c85518Srobert  // the end-comment marker, e.g.:
*12c85518Srobert  //   /** This is a multi-line comment block.
*12c85518Srobert  //       The lexer will produce two newline tokens here > */
*12c85518Srobert  // previousLine will record the line number when we previously saw a newline
*12c85518Srobert  // token and recorded a comment line. If we see another newline token on the
*12c85518Srobert  // same line, don't record anything in between.
*12c85518Srobert  unsigned PreviousLine = 0;
*12c85518Srobert
e5dd7070Spatrick  // Processes one line of the comment and adds it to the result.
e5dd7070Spatrick  // Handles skipping the indent at the start of the line.
e5dd7070Spatrick  // Returns false when eof is reached and true otherwise.
e5dd7070Spatrick  auto LexLine = [&](bool IsFirstLine) -> bool {
e5dd7070Spatrick    comments::Token Tok;
e5dd7070Spatrick    // Lex the first token on the line. We handle it separately, because we to
e5dd7070Spatrick    // fix up its indentation.
e5dd7070Spatrick    L.lex(Tok);
e5dd7070Spatrick    if (Tok.is(comments::tok::eof))
e5dd7070Spatrick      return false;
e5dd7070Spatrick    if (Tok.is(comments::tok::newline)) {
*12c85518Srobert      PresumedLoc Loc = SourceMgr.getPresumedLoc(Tok.getLocation());
*12c85518Srobert      if (Loc.getLine() != PreviousLine) {
*12c85518Srobert        Result.emplace_back("", Loc, Loc);
*12c85518Srobert        PreviousLine = Loc.getLine();
*12c85518Srobert      }
e5dd7070Spatrick      return true;
e5dd7070Spatrick    }
*12c85518Srobert    SmallString<124> Line;
e5dd7070Spatrick    llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
e5dd7070Spatrick    bool LocInvalid = false;
e5dd7070Spatrick    unsigned TokColumn =
e5dd7070Spatrick        SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
e5dd7070Spatrick    assert(!LocInvalid && "getFormattedText for invalid location");
e5dd7070Spatrick
e5dd7070Spatrick    // Amount of leading whitespace in TokText.
e5dd7070Spatrick    size_t WhitespaceLen = TokText.find_first_not_of(" \t");
e5dd7070Spatrick    if (WhitespaceLen == StringRef::npos)
e5dd7070Spatrick      WhitespaceLen = TokText.size();
e5dd7070Spatrick    // Remember the amount of whitespace we skipped in the first line to remove
e5dd7070Spatrick    // indent up to that column in the following lines.
e5dd7070Spatrick    if (IsFirstLine)
e5dd7070Spatrick      IndentColumn = TokColumn + WhitespaceLen;
e5dd7070Spatrick
e5dd7070Spatrick    // Amount of leading whitespace we actually want to skip.
e5dd7070Spatrick    // For the first line we skip all the whitespace.
e5dd7070Spatrick    // For the rest of the lines, we skip whitespace up to IndentColumn.
e5dd7070Spatrick    unsigned SkipLen =
e5dd7070Spatrick        IsFirstLine
e5dd7070Spatrick            ? WhitespaceLen
e5dd7070Spatrick            : std::min<size_t>(
e5dd7070Spatrick                  WhitespaceLen,
e5dd7070Spatrick                  std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
e5dd7070Spatrick    llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
*12c85518Srobert    Line += Trimmed;
*12c85518Srobert    // Get the beginning location of the adjusted comment line.
*12c85518Srobert    PresumedLoc Begin =
*12c85518Srobert        SourceMgr.getPresumedLoc(Tok.getLocation().getLocWithOffset(SkipLen));
*12c85518Srobert
e5dd7070Spatrick    // Lex all tokens in the rest of the line.
e5dd7070Spatrick    for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
e5dd7070Spatrick      if (Tok.is(comments::tok::newline)) {
*12c85518Srobert        // Get the ending location of the comment line.
*12c85518Srobert        PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
*12c85518Srobert        if (End.getLine() != PreviousLine) {
*12c85518Srobert          Result.emplace_back(Line, Begin, End);
*12c85518Srobert          PreviousLine = End.getLine();
*12c85518Srobert        }
e5dd7070Spatrick        return true;
e5dd7070Spatrick      }
*12c85518Srobert      Line += L.getSpelling(Tok, SourceMgr);
e5dd7070Spatrick    }
*12c85518Srobert    PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
*12c85518Srobert    Result.emplace_back(Line, Begin, End);
e5dd7070Spatrick    // We've reached the end of file token.
e5dd7070Spatrick    return false;
e5dd7070Spatrick  };
e5dd7070Spatrick
e5dd7070Spatrick  // Process first line separately to remember indent for the following lines.
*12c85518Srobert  if (!LexLine(/*IsFirstLine=*/true))
e5dd7070Spatrick    return Result;
e5dd7070Spatrick  // Process the rest of the lines.
e5dd7070Spatrick  while (LexLine(/*IsFirstLine=*/false))
e5dd7070Spatrick    ;
e5dd7070Spatrick  return Result;
e5dd7070Spatrick}