xref: /openbsd-src/gnu/llvm/clang/lib/AST/RawCommentList.cpp (revision 12c855180aad702bbcca06e0398d774beeafb155)
1e5dd7070Spatrick //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick 
9e5dd7070Spatrick #include "clang/AST/RawCommentList.h"
10e5dd7070Spatrick #include "clang/AST/ASTContext.h"
11e5dd7070Spatrick #include "clang/AST/Comment.h"
12e5dd7070Spatrick #include "clang/AST/CommentBriefParser.h"
13e5dd7070Spatrick #include "clang/AST/CommentCommandTraits.h"
14e5dd7070Spatrick #include "clang/AST/CommentLexer.h"
15e5dd7070Spatrick #include "clang/AST/CommentParser.h"
16e5dd7070Spatrick #include "clang/AST/CommentSema.h"
17e5dd7070Spatrick #include "clang/Basic/CharInfo.h"
18e5dd7070Spatrick #include "llvm/ADT/STLExtras.h"
19*12c85518Srobert #include "llvm/ADT/StringExtras.h"
20ec727ea7Spatrick #include "llvm/Support/Allocator.h"
21e5dd7070Spatrick 
22e5dd7070Spatrick using namespace clang;
23e5dd7070Spatrick 
24e5dd7070Spatrick namespace {
25e5dd7070Spatrick /// Get comment kind and bool describing if it is a trailing comment.
getCommentKind(StringRef Comment,bool ParseAllComments)26e5dd7070Spatrick std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
27e5dd7070Spatrick                                                         bool ParseAllComments) {
28e5dd7070Spatrick   const size_t MinCommentLength = ParseAllComments ? 2 : 3;
29e5dd7070Spatrick   if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
30e5dd7070Spatrick     return std::make_pair(RawComment::RCK_Invalid, false);
31e5dd7070Spatrick 
32e5dd7070Spatrick   RawComment::CommentKind K;
33e5dd7070Spatrick   if (Comment[1] == '/') {
34e5dd7070Spatrick     if (Comment.size() < 3)
35e5dd7070Spatrick       return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
36e5dd7070Spatrick 
37e5dd7070Spatrick     if (Comment[2] == '/')
38e5dd7070Spatrick       K = RawComment::RCK_BCPLSlash;
39e5dd7070Spatrick     else if (Comment[2] == '!')
40e5dd7070Spatrick       K = RawComment::RCK_BCPLExcl;
41e5dd7070Spatrick     else
42e5dd7070Spatrick       return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
43e5dd7070Spatrick   } else {
44e5dd7070Spatrick     assert(Comment.size() >= 4);
45e5dd7070Spatrick 
46e5dd7070Spatrick     // Comment lexer does not understand escapes in comment markers, so pretend
47e5dd7070Spatrick     // that this is not a comment.
48e5dd7070Spatrick     if (Comment[1] != '*' ||
49e5dd7070Spatrick         Comment[Comment.size() - 2] != '*' ||
50e5dd7070Spatrick         Comment[Comment.size() - 1] != '/')
51e5dd7070Spatrick       return std::make_pair(RawComment::RCK_Invalid, false);
52e5dd7070Spatrick 
53e5dd7070Spatrick     if (Comment[2] == '*')
54e5dd7070Spatrick       K = RawComment::RCK_JavaDoc;
55e5dd7070Spatrick     else if (Comment[2] == '!')
56e5dd7070Spatrick       K = RawComment::RCK_Qt;
57e5dd7070Spatrick     else
58e5dd7070Spatrick       return std::make_pair(RawComment::RCK_OrdinaryC, false);
59e5dd7070Spatrick   }
60e5dd7070Spatrick   const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
61e5dd7070Spatrick   return std::make_pair(K, TrailingComment);
62e5dd7070Spatrick }
63e5dd7070Spatrick 
mergedCommentIsTrailingComment(StringRef Comment)64e5dd7070Spatrick bool mergedCommentIsTrailingComment(StringRef Comment) {
65e5dd7070Spatrick   return (Comment.size() > 3) && (Comment[3] == '<');
66e5dd7070Spatrick }
67e5dd7070Spatrick 
68e5dd7070Spatrick /// Returns true if R1 and R2 both have valid locations that start on the same
69e5dd7070Spatrick /// column.
commentsStartOnSameColumn(const SourceManager & SM,const RawComment & R1,const RawComment & R2)70e5dd7070Spatrick bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
71e5dd7070Spatrick                                const RawComment &R2) {
72e5dd7070Spatrick   SourceLocation L1 = R1.getBeginLoc();
73e5dd7070Spatrick   SourceLocation L2 = R2.getBeginLoc();
74e5dd7070Spatrick   bool Invalid = false;
75e5dd7070Spatrick   unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
76e5dd7070Spatrick   if (!Invalid) {
77e5dd7070Spatrick     unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
78e5dd7070Spatrick     return !Invalid && (C1 == C2);
79e5dd7070Spatrick   }
80e5dd7070Spatrick   return false;
81e5dd7070Spatrick }
82e5dd7070Spatrick } // unnamed namespace
83e5dd7070Spatrick 
84e5dd7070Spatrick /// Determines whether there is only whitespace in `Buffer` between `P`
85e5dd7070Spatrick /// and the previous line.
86e5dd7070Spatrick /// \param Buffer The buffer to search in.
87e5dd7070Spatrick /// \param P The offset from the beginning of `Buffer` to start from.
88e5dd7070Spatrick /// \return true if all of the characters in `Buffer` ranging from the closest
89e5dd7070Spatrick /// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
90e5dd7070Spatrick /// are whitespace.
onlyWhitespaceOnLineBefore(const char * Buffer,unsigned P)91e5dd7070Spatrick static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
92e5dd7070Spatrick   // Search backwards until we see linefeed or carriage return.
93e5dd7070Spatrick   for (unsigned I = P; I != 0; --I) {
94e5dd7070Spatrick     char C = Buffer[I - 1];
95e5dd7070Spatrick     if (isVerticalWhitespace(C))
96e5dd7070Spatrick       return true;
97e5dd7070Spatrick     if (!isHorizontalWhitespace(C))
98e5dd7070Spatrick       return false;
99e5dd7070Spatrick   }
100e5dd7070Spatrick   // We hit the beginning of the buffer.
101e5dd7070Spatrick   return true;
102e5dd7070Spatrick }
103e5dd7070Spatrick 
104e5dd7070Spatrick /// Returns whether `K` is an ordinary comment kind.
isOrdinaryKind(RawComment::CommentKind K)105e5dd7070Spatrick static bool isOrdinaryKind(RawComment::CommentKind K) {
106e5dd7070Spatrick   return (K == RawComment::RCK_OrdinaryBCPL) ||
107e5dd7070Spatrick          (K == RawComment::RCK_OrdinaryC);
108e5dd7070Spatrick }
109e5dd7070Spatrick 
RawComment(const SourceManager & SourceMgr,SourceRange SR,const CommentOptions & CommentOpts,bool Merged)110e5dd7070Spatrick RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
111e5dd7070Spatrick                        const CommentOptions &CommentOpts, bool Merged) :
112e5dd7070Spatrick     Range(SR), RawTextValid(false), BriefTextValid(false),
113e5dd7070Spatrick     IsAttached(false), IsTrailingComment(false),
114e5dd7070Spatrick     IsAlmostTrailingComment(false) {
115e5dd7070Spatrick   // Extract raw comment text, if possible.
116e5dd7070Spatrick   if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
117e5dd7070Spatrick     Kind = RCK_Invalid;
118e5dd7070Spatrick     return;
119e5dd7070Spatrick   }
120e5dd7070Spatrick 
121e5dd7070Spatrick   // Guess comment kind.
122e5dd7070Spatrick   std::pair<CommentKind, bool> K =
123e5dd7070Spatrick       getCommentKind(RawText, CommentOpts.ParseAllComments);
124e5dd7070Spatrick 
125e5dd7070Spatrick   // Guess whether an ordinary comment is trailing.
126e5dd7070Spatrick   if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) {
127e5dd7070Spatrick     FileID BeginFileID;
128e5dd7070Spatrick     unsigned BeginOffset;
129e5dd7070Spatrick     std::tie(BeginFileID, BeginOffset) =
130e5dd7070Spatrick         SourceMgr.getDecomposedLoc(Range.getBegin());
131e5dd7070Spatrick     if (BeginOffset != 0) {
132e5dd7070Spatrick       bool Invalid = false;
133e5dd7070Spatrick       const char *Buffer =
134e5dd7070Spatrick           SourceMgr.getBufferData(BeginFileID, &Invalid).data();
135e5dd7070Spatrick       IsTrailingComment |=
136e5dd7070Spatrick           (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
137e5dd7070Spatrick     }
138e5dd7070Spatrick   }
139e5dd7070Spatrick 
140e5dd7070Spatrick   if (!Merged) {
141e5dd7070Spatrick     Kind = K.first;
142e5dd7070Spatrick     IsTrailingComment |= K.second;
143e5dd7070Spatrick 
144e5dd7070Spatrick     IsAlmostTrailingComment = RawText.startswith("//<") ||
145e5dd7070Spatrick                                  RawText.startswith("/*<");
146e5dd7070Spatrick   } else {
147e5dd7070Spatrick     Kind = RCK_Merged;
148e5dd7070Spatrick     IsTrailingComment =
149e5dd7070Spatrick         IsTrailingComment || mergedCommentIsTrailingComment(RawText);
150e5dd7070Spatrick   }
151e5dd7070Spatrick }
152e5dd7070Spatrick 
getRawTextSlow(const SourceManager & SourceMgr) const153e5dd7070Spatrick StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
154e5dd7070Spatrick   FileID BeginFileID;
155e5dd7070Spatrick   FileID EndFileID;
156e5dd7070Spatrick   unsigned BeginOffset;
157e5dd7070Spatrick   unsigned EndOffset;
158e5dd7070Spatrick 
159e5dd7070Spatrick   std::tie(BeginFileID, BeginOffset) =
160e5dd7070Spatrick       SourceMgr.getDecomposedLoc(Range.getBegin());
161e5dd7070Spatrick   std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
162e5dd7070Spatrick 
163e5dd7070Spatrick   const unsigned Length = EndOffset - BeginOffset;
164e5dd7070Spatrick   if (Length < 2)
165e5dd7070Spatrick     return StringRef();
166e5dd7070Spatrick 
167e5dd7070Spatrick   // The comment can't begin in one file and end in another.
168e5dd7070Spatrick   assert(BeginFileID == EndFileID);
169e5dd7070Spatrick 
170e5dd7070Spatrick   bool Invalid = false;
171e5dd7070Spatrick   const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
172e5dd7070Spatrick                                                     &Invalid).data();
173e5dd7070Spatrick   if (Invalid)
174e5dd7070Spatrick     return StringRef();
175e5dd7070Spatrick 
176e5dd7070Spatrick   return StringRef(BufferStart + BeginOffset, Length);
177e5dd7070Spatrick }
178e5dd7070Spatrick 
extractBriefText(const ASTContext & Context) const179e5dd7070Spatrick const char *RawComment::extractBriefText(const ASTContext &Context) const {
180e5dd7070Spatrick   // Lazily initialize RawText using the accessor before using it.
181e5dd7070Spatrick   (void)getRawText(Context.getSourceManager());
182e5dd7070Spatrick 
183e5dd7070Spatrick   // Since we will be copying the resulting text, all allocations made during
184e5dd7070Spatrick   // parsing are garbage after resulting string is formed.  Thus we can use
185e5dd7070Spatrick   // a separate allocator for all temporary stuff.
186e5dd7070Spatrick   llvm::BumpPtrAllocator Allocator;
187e5dd7070Spatrick 
188e5dd7070Spatrick   comments::Lexer L(Allocator, Context.getDiagnostics(),
189e5dd7070Spatrick                     Context.getCommentCommandTraits(),
190e5dd7070Spatrick                     Range.getBegin(),
191e5dd7070Spatrick                     RawText.begin(), RawText.end());
192e5dd7070Spatrick   comments::BriefParser P(L, Context.getCommentCommandTraits());
193e5dd7070Spatrick 
194e5dd7070Spatrick   const std::string Result = P.Parse();
195e5dd7070Spatrick   const unsigned BriefTextLength = Result.size();
196e5dd7070Spatrick   char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
197e5dd7070Spatrick   memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
198e5dd7070Spatrick   BriefText = BriefTextPtr;
199e5dd7070Spatrick   BriefTextValid = true;
200e5dd7070Spatrick 
201e5dd7070Spatrick   return BriefTextPtr;
202e5dd7070Spatrick }
203e5dd7070Spatrick 
parse(const ASTContext & Context,const Preprocessor * PP,const Decl * D) const204e5dd7070Spatrick comments::FullComment *RawComment::parse(const ASTContext &Context,
205e5dd7070Spatrick                                          const Preprocessor *PP,
206e5dd7070Spatrick                                          const Decl *D) const {
207e5dd7070Spatrick   // Lazily initialize RawText using the accessor before using it.
208e5dd7070Spatrick   (void)getRawText(Context.getSourceManager());
209e5dd7070Spatrick 
210e5dd7070Spatrick   comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
211e5dd7070Spatrick                     Context.getCommentCommandTraits(),
212e5dd7070Spatrick                     getSourceRange().getBegin(),
213e5dd7070Spatrick                     RawText.begin(), RawText.end());
214e5dd7070Spatrick   comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
215e5dd7070Spatrick                    Context.getDiagnostics(),
216e5dd7070Spatrick                    Context.getCommentCommandTraits(),
217e5dd7070Spatrick                    PP);
218e5dd7070Spatrick   S.setDecl(D);
219e5dd7070Spatrick   comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
220e5dd7070Spatrick                      Context.getDiagnostics(),
221e5dd7070Spatrick                      Context.getCommentCommandTraits());
222e5dd7070Spatrick 
223e5dd7070Spatrick   return P.parseFullComment();
224e5dd7070Spatrick }
225e5dd7070Spatrick 
onlyWhitespaceBetween(SourceManager & SM,SourceLocation Loc1,SourceLocation Loc2,unsigned MaxNewlinesAllowed)226e5dd7070Spatrick static bool onlyWhitespaceBetween(SourceManager &SM,
227e5dd7070Spatrick                                   SourceLocation Loc1, SourceLocation Loc2,
228e5dd7070Spatrick                                   unsigned MaxNewlinesAllowed) {
229e5dd7070Spatrick   std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
230e5dd7070Spatrick   std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
231e5dd7070Spatrick 
232e5dd7070Spatrick   // Question does not make sense if locations are in different files.
233e5dd7070Spatrick   if (Loc1Info.first != Loc2Info.first)
234e5dd7070Spatrick     return false;
235e5dd7070Spatrick 
236e5dd7070Spatrick   bool Invalid = false;
237e5dd7070Spatrick   const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
238e5dd7070Spatrick   if (Invalid)
239e5dd7070Spatrick     return false;
240e5dd7070Spatrick 
241e5dd7070Spatrick   unsigned NumNewlines = 0;
242e5dd7070Spatrick   assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
243e5dd7070Spatrick   // Look for non-whitespace characters and remember any newlines seen.
244e5dd7070Spatrick   for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
245e5dd7070Spatrick     switch (Buffer[I]) {
246e5dd7070Spatrick     default:
247e5dd7070Spatrick       return false;
248e5dd7070Spatrick     case ' ':
249e5dd7070Spatrick     case '\t':
250e5dd7070Spatrick     case '\f':
251e5dd7070Spatrick     case '\v':
252e5dd7070Spatrick       break;
253e5dd7070Spatrick     case '\r':
254e5dd7070Spatrick     case '\n':
255e5dd7070Spatrick       ++NumNewlines;
256e5dd7070Spatrick 
257e5dd7070Spatrick       // Check if we have found more than the maximum allowed number of
258e5dd7070Spatrick       // newlines.
259e5dd7070Spatrick       if (NumNewlines > MaxNewlinesAllowed)
260e5dd7070Spatrick         return false;
261e5dd7070Spatrick 
262e5dd7070Spatrick       // Collapse \r\n and \n\r into a single newline.
263e5dd7070Spatrick       if (I + 1 != Loc2Info.second &&
264e5dd7070Spatrick           (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
265e5dd7070Spatrick           Buffer[I] != Buffer[I + 1])
266e5dd7070Spatrick         ++I;
267e5dd7070Spatrick       break;
268e5dd7070Spatrick     }
269e5dd7070Spatrick   }
270e5dd7070Spatrick 
271e5dd7070Spatrick   return true;
272e5dd7070Spatrick }
273e5dd7070Spatrick 
addComment(const RawComment & RC,const CommentOptions & CommentOpts,llvm::BumpPtrAllocator & Allocator)274e5dd7070Spatrick void RawCommentList::addComment(const RawComment &RC,
275e5dd7070Spatrick                                 const CommentOptions &CommentOpts,
276e5dd7070Spatrick                                 llvm::BumpPtrAllocator &Allocator) {
277e5dd7070Spatrick   if (RC.isInvalid())
278e5dd7070Spatrick     return;
279e5dd7070Spatrick 
280e5dd7070Spatrick   // Ordinary comments are not interesting for us.
281e5dd7070Spatrick   if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
282e5dd7070Spatrick     return;
283e5dd7070Spatrick 
284e5dd7070Spatrick   std::pair<FileID, unsigned> Loc =
285e5dd7070Spatrick       SourceMgr.getDecomposedLoc(RC.getBeginLoc());
286e5dd7070Spatrick 
287e5dd7070Spatrick   const FileID CommentFile = Loc.first;
288e5dd7070Spatrick   const unsigned CommentOffset = Loc.second;
289e5dd7070Spatrick 
290e5dd7070Spatrick   // If this is the first Doxygen comment, save it (because there isn't
291e5dd7070Spatrick   // anything to merge it with).
292e5dd7070Spatrick   if (OrderedComments[CommentFile].empty()) {
293e5dd7070Spatrick     OrderedComments[CommentFile][CommentOffset] =
294e5dd7070Spatrick         new (Allocator) RawComment(RC);
295e5dd7070Spatrick     return;
296e5dd7070Spatrick   }
297e5dd7070Spatrick 
298e5dd7070Spatrick   const RawComment &C1 = *OrderedComments[CommentFile].rbegin()->second;
299e5dd7070Spatrick   const RawComment &C2 = RC;
300e5dd7070Spatrick 
301e5dd7070Spatrick   // Merge comments only if there is only whitespace between them.
302e5dd7070Spatrick   // Can't merge trailing and non-trailing comments unless the second is
303e5dd7070Spatrick   // non-trailing ordinary in the same column, as in the case:
304e5dd7070Spatrick   //   int x; // documents x
305e5dd7070Spatrick   //          // more text
306e5dd7070Spatrick   // versus:
307e5dd7070Spatrick   //   int x; // documents x
308e5dd7070Spatrick   //   int y; // documents y
309e5dd7070Spatrick   // or:
310e5dd7070Spatrick   //   int x; // documents x
311e5dd7070Spatrick   //   // documents y
312e5dd7070Spatrick   //   int y;
313e5dd7070Spatrick   // Merge comments if they are on same or consecutive lines.
314e5dd7070Spatrick   if ((C1.isTrailingComment() == C2.isTrailingComment() ||
315e5dd7070Spatrick        (C1.isTrailingComment() && !C2.isTrailingComment() &&
316e5dd7070Spatrick         isOrdinaryKind(C2.getKind()) &&
317e5dd7070Spatrick         commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
318e5dd7070Spatrick       onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(),
319e5dd7070Spatrick                             /*MaxNewlinesAllowed=*/1)) {
320e5dd7070Spatrick     SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
321e5dd7070Spatrick     *OrderedComments[CommentFile].rbegin()->second =
322e5dd7070Spatrick         RawComment(SourceMgr, MergedRange, CommentOpts, true);
323e5dd7070Spatrick   } else {
324e5dd7070Spatrick     OrderedComments[CommentFile][CommentOffset] =
325e5dd7070Spatrick         new (Allocator) RawComment(RC);
326e5dd7070Spatrick   }
327e5dd7070Spatrick }
328e5dd7070Spatrick 
329e5dd7070Spatrick const std::map<unsigned, RawComment *> *
getCommentsInFile(FileID File) const330e5dd7070Spatrick RawCommentList::getCommentsInFile(FileID File) const {
331e5dd7070Spatrick   auto CommentsInFile = OrderedComments.find(File);
332e5dd7070Spatrick   if (CommentsInFile == OrderedComments.end())
333e5dd7070Spatrick     return nullptr;
334e5dd7070Spatrick 
335e5dd7070Spatrick   return &CommentsInFile->second;
336e5dd7070Spatrick }
337e5dd7070Spatrick 
empty() const338e5dd7070Spatrick bool RawCommentList::empty() const { return OrderedComments.empty(); }
339e5dd7070Spatrick 
getCommentBeginLine(RawComment * C,FileID File,unsigned Offset) const340e5dd7070Spatrick unsigned RawCommentList::getCommentBeginLine(RawComment *C, FileID File,
341e5dd7070Spatrick                                              unsigned Offset) const {
342e5dd7070Spatrick   auto Cached = CommentBeginLine.find(C);
343e5dd7070Spatrick   if (Cached != CommentBeginLine.end())
344e5dd7070Spatrick     return Cached->second;
345e5dd7070Spatrick   const unsigned Line = SourceMgr.getLineNumber(File, Offset);
346e5dd7070Spatrick   CommentBeginLine[C] = Line;
347e5dd7070Spatrick   return Line;
348e5dd7070Spatrick }
349e5dd7070Spatrick 
getCommentEndOffset(RawComment * C) const350e5dd7070Spatrick unsigned RawCommentList::getCommentEndOffset(RawComment *C) const {
351e5dd7070Spatrick   auto Cached = CommentEndOffset.find(C);
352e5dd7070Spatrick   if (Cached != CommentEndOffset.end())
353e5dd7070Spatrick     return Cached->second;
354e5dd7070Spatrick   const unsigned Offset =
355e5dd7070Spatrick       SourceMgr.getDecomposedLoc(C->getSourceRange().getEnd()).second;
356e5dd7070Spatrick   CommentEndOffset[C] = Offset;
357e5dd7070Spatrick   return Offset;
358e5dd7070Spatrick }
359e5dd7070Spatrick 
getFormattedText(const SourceManager & SourceMgr,DiagnosticsEngine & Diags) const360e5dd7070Spatrick std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
361e5dd7070Spatrick                                          DiagnosticsEngine &Diags) const {
362e5dd7070Spatrick   llvm::StringRef CommentText = getRawText(SourceMgr);
363e5dd7070Spatrick   if (CommentText.empty())
364e5dd7070Spatrick     return "";
365e5dd7070Spatrick 
366*12c85518Srobert   std::string Result;
367*12c85518Srobert   for (const RawComment::CommentLine &Line :
368*12c85518Srobert        getFormattedLines(SourceMgr, Diags))
369*12c85518Srobert     Result += Line.Text + "\n";
370*12c85518Srobert 
371*12c85518Srobert   auto LastChar = Result.find_last_not_of('\n');
372*12c85518Srobert   Result.erase(LastChar + 1, Result.size());
373*12c85518Srobert 
374*12c85518Srobert   return Result;
375*12c85518Srobert }
376*12c85518Srobert 
377*12c85518Srobert std::vector<RawComment::CommentLine>
getFormattedLines(const SourceManager & SourceMgr,DiagnosticsEngine & Diags) const378*12c85518Srobert RawComment::getFormattedLines(const SourceManager &SourceMgr,
379*12c85518Srobert                               DiagnosticsEngine &Diags) const {
380*12c85518Srobert   llvm::StringRef CommentText = getRawText(SourceMgr);
381*12c85518Srobert   if (CommentText.empty())
382*12c85518Srobert     return {};
383*12c85518Srobert 
384e5dd7070Spatrick   llvm::BumpPtrAllocator Allocator;
385e5dd7070Spatrick   // We do not parse any commands, so CommentOptions are ignored by
386e5dd7070Spatrick   // comments::Lexer. Therefore, we just use default-constructed options.
387e5dd7070Spatrick   CommentOptions DefOpts;
388e5dd7070Spatrick   comments::CommandTraits EmptyTraits(Allocator, DefOpts);
389e5dd7070Spatrick   comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
390e5dd7070Spatrick                     CommentText.begin(), CommentText.end(),
391e5dd7070Spatrick                     /*ParseCommands=*/false);
392e5dd7070Spatrick 
393*12c85518Srobert   std::vector<RawComment::CommentLine> Result;
394e5dd7070Spatrick   // A column number of the first non-whitespace token in the comment text.
395e5dd7070Spatrick   // We skip whitespace up to this column, but keep the whitespace after this
396e5dd7070Spatrick   // column. IndentColumn is calculated when lexing the first line and reused
397e5dd7070Spatrick   // for the rest of lines.
398e5dd7070Spatrick   unsigned IndentColumn = 0;
399e5dd7070Spatrick 
400*12c85518Srobert   // Record the line number of the last processed comment line.
401*12c85518Srobert   // For block-style comments, an extra newline token will be produced after
402*12c85518Srobert   // the end-comment marker, e.g.:
403*12c85518Srobert   //   /** This is a multi-line comment block.
404*12c85518Srobert   //       The lexer will produce two newline tokens here > */
405*12c85518Srobert   // previousLine will record the line number when we previously saw a newline
406*12c85518Srobert   // token and recorded a comment line. If we see another newline token on the
407*12c85518Srobert   // same line, don't record anything in between.
408*12c85518Srobert   unsigned PreviousLine = 0;
409*12c85518Srobert 
410e5dd7070Spatrick   // Processes one line of the comment and adds it to the result.
411e5dd7070Spatrick   // Handles skipping the indent at the start of the line.
412e5dd7070Spatrick   // Returns false when eof is reached and true otherwise.
413e5dd7070Spatrick   auto LexLine = [&](bool IsFirstLine) -> bool {
414e5dd7070Spatrick     comments::Token Tok;
415e5dd7070Spatrick     // Lex the first token on the line. We handle it separately, because we to
416e5dd7070Spatrick     // fix up its indentation.
417e5dd7070Spatrick     L.lex(Tok);
418e5dd7070Spatrick     if (Tok.is(comments::tok::eof))
419e5dd7070Spatrick       return false;
420e5dd7070Spatrick     if (Tok.is(comments::tok::newline)) {
421*12c85518Srobert       PresumedLoc Loc = SourceMgr.getPresumedLoc(Tok.getLocation());
422*12c85518Srobert       if (Loc.getLine() != PreviousLine) {
423*12c85518Srobert         Result.emplace_back("", Loc, Loc);
424*12c85518Srobert         PreviousLine = Loc.getLine();
425*12c85518Srobert       }
426e5dd7070Spatrick       return true;
427e5dd7070Spatrick     }
428*12c85518Srobert     SmallString<124> Line;
429e5dd7070Spatrick     llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
430e5dd7070Spatrick     bool LocInvalid = false;
431e5dd7070Spatrick     unsigned TokColumn =
432e5dd7070Spatrick         SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
433e5dd7070Spatrick     assert(!LocInvalid && "getFormattedText for invalid location");
434e5dd7070Spatrick 
435e5dd7070Spatrick     // Amount of leading whitespace in TokText.
436e5dd7070Spatrick     size_t WhitespaceLen = TokText.find_first_not_of(" \t");
437e5dd7070Spatrick     if (WhitespaceLen == StringRef::npos)
438e5dd7070Spatrick       WhitespaceLen = TokText.size();
439e5dd7070Spatrick     // Remember the amount of whitespace we skipped in the first line to remove
440e5dd7070Spatrick     // indent up to that column in the following lines.
441e5dd7070Spatrick     if (IsFirstLine)
442e5dd7070Spatrick       IndentColumn = TokColumn + WhitespaceLen;
443e5dd7070Spatrick 
444e5dd7070Spatrick     // Amount of leading whitespace we actually want to skip.
445e5dd7070Spatrick     // For the first line we skip all the whitespace.
446e5dd7070Spatrick     // For the rest of the lines, we skip whitespace up to IndentColumn.
447e5dd7070Spatrick     unsigned SkipLen =
448e5dd7070Spatrick         IsFirstLine
449e5dd7070Spatrick             ? WhitespaceLen
450e5dd7070Spatrick             : std::min<size_t>(
451e5dd7070Spatrick                   WhitespaceLen,
452e5dd7070Spatrick                   std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
453e5dd7070Spatrick     llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
454*12c85518Srobert     Line += Trimmed;
455*12c85518Srobert     // Get the beginning location of the adjusted comment line.
456*12c85518Srobert     PresumedLoc Begin =
457*12c85518Srobert         SourceMgr.getPresumedLoc(Tok.getLocation().getLocWithOffset(SkipLen));
458*12c85518Srobert 
459e5dd7070Spatrick     // Lex all tokens in the rest of the line.
460e5dd7070Spatrick     for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
461e5dd7070Spatrick       if (Tok.is(comments::tok::newline)) {
462*12c85518Srobert         // Get the ending location of the comment line.
463*12c85518Srobert         PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
464*12c85518Srobert         if (End.getLine() != PreviousLine) {
465*12c85518Srobert           Result.emplace_back(Line, Begin, End);
466*12c85518Srobert           PreviousLine = End.getLine();
467*12c85518Srobert         }
468e5dd7070Spatrick         return true;
469e5dd7070Spatrick       }
470*12c85518Srobert       Line += L.getSpelling(Tok, SourceMgr);
471e5dd7070Spatrick     }
472*12c85518Srobert     PresumedLoc End = SourceMgr.getPresumedLoc(Tok.getLocation());
473*12c85518Srobert     Result.emplace_back(Line, Begin, End);
474e5dd7070Spatrick     // We've reached the end of file token.
475e5dd7070Spatrick     return false;
476e5dd7070Spatrick   };
477e5dd7070Spatrick 
478e5dd7070Spatrick   // Process first line separately to remember indent for the following lines.
479*12c85518Srobert   if (!LexLine(/*IsFirstLine=*/true))
480e5dd7070Spatrick     return Result;
481e5dd7070Spatrick   // Process the rest of the lines.
482e5dd7070Spatrick   while (LexLine(/*IsFirstLine=*/false))
483e5dd7070Spatrick     ;
484e5dd7070Spatrick   return Result;
485e5dd7070Spatrick }
486