xref: /netbsd-src/external/apache2/llvm/dist/clang/lib/Tooling/Transformer/SourceCode.cpp (revision e038c9c4676b0f19b1b7dd08a940c6ed64a6d5ae)
17330f729Sjoerg //===--- SourceCode.cpp - Source code manipulation routines -----*- C++ -*-===//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg //  This file provides functions that simplify extraction of source code.
107330f729Sjoerg //
117330f729Sjoerg //===----------------------------------------------------------------------===//
127330f729Sjoerg #include "clang/Tooling/Transformer/SourceCode.h"
13*e038c9c4Sjoerg #include "clang/AST/ASTContext.h"
14*e038c9c4Sjoerg #include "clang/AST/Attr.h"
15*e038c9c4Sjoerg #include "clang/AST/Comment.h"
16*e038c9c4Sjoerg #include "clang/AST/Decl.h"
17*e038c9c4Sjoerg #include "clang/AST/DeclCXX.h"
18*e038c9c4Sjoerg #include "clang/AST/DeclTemplate.h"
19*e038c9c4Sjoerg #include "clang/AST/Expr.h"
20*e038c9c4Sjoerg #include "clang/Basic/SourceManager.h"
217330f729Sjoerg #include "clang/Lex/Lexer.h"
22*e038c9c4Sjoerg #include "llvm/Support/Errc.h"
23*e038c9c4Sjoerg #include "llvm/Support/Error.h"
24*e038c9c4Sjoerg #include <set>
257330f729Sjoerg 
267330f729Sjoerg using namespace clang;
277330f729Sjoerg 
28*e038c9c4Sjoerg using llvm::errc;
29*e038c9c4Sjoerg using llvm::StringError;
30*e038c9c4Sjoerg 
getText(CharSourceRange Range,const ASTContext & Context)317330f729Sjoerg StringRef clang::tooling::getText(CharSourceRange Range,
327330f729Sjoerg                                   const ASTContext &Context) {
337330f729Sjoerg   return Lexer::getSourceText(Range, Context.getSourceManager(),
347330f729Sjoerg                               Context.getLangOpts());
357330f729Sjoerg }
367330f729Sjoerg 
maybeExtendRange(CharSourceRange Range,tok::TokenKind Next,ASTContext & Context)377330f729Sjoerg CharSourceRange clang::tooling::maybeExtendRange(CharSourceRange Range,
387330f729Sjoerg                                                  tok::TokenKind Next,
397330f729Sjoerg                                                  ASTContext &Context) {
40*e038c9c4Sjoerg   CharSourceRange R = Lexer::getAsCharRange(Range, Context.getSourceManager(),
41*e038c9c4Sjoerg                                             Context.getLangOpts());
42*e038c9c4Sjoerg   if (R.isInvalid())
437330f729Sjoerg     return Range;
44*e038c9c4Sjoerg   Token Tok;
45*e038c9c4Sjoerg   bool Err =
46*e038c9c4Sjoerg       Lexer::getRawToken(R.getEnd(), Tok, Context.getSourceManager(),
47*e038c9c4Sjoerg                          Context.getLangOpts(), /*IgnoreWhiteSpace=*/true);
48*e038c9c4Sjoerg   if (Err || !Tok.is(Next))
49*e038c9c4Sjoerg     return Range;
50*e038c9c4Sjoerg   return CharSourceRange::getTokenRange(Range.getBegin(), Tok.getLocation());
51*e038c9c4Sjoerg }
52*e038c9c4Sjoerg 
validateEditRange(const CharSourceRange & Range,const SourceManager & SM)53*e038c9c4Sjoerg llvm::Error clang::tooling::validateEditRange(const CharSourceRange &Range,
54*e038c9c4Sjoerg                                               const SourceManager &SM) {
55*e038c9c4Sjoerg   if (Range.isInvalid())
56*e038c9c4Sjoerg     return llvm::make_error<StringError>(errc::invalid_argument,
57*e038c9c4Sjoerg                                          "Invalid range");
58*e038c9c4Sjoerg 
59*e038c9c4Sjoerg   if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
60*e038c9c4Sjoerg     return llvm::make_error<StringError>(
61*e038c9c4Sjoerg         errc::invalid_argument, "Range starts or ends in a macro expansion");
62*e038c9c4Sjoerg 
63*e038c9c4Sjoerg   if (SM.isInSystemHeader(Range.getBegin()) ||
64*e038c9c4Sjoerg       SM.isInSystemHeader(Range.getEnd()))
65*e038c9c4Sjoerg     return llvm::make_error<StringError>(errc::invalid_argument,
66*e038c9c4Sjoerg                                          "Range is in system header");
67*e038c9c4Sjoerg 
68*e038c9c4Sjoerg   std::pair<FileID, unsigned> BeginInfo = SM.getDecomposedLoc(Range.getBegin());
69*e038c9c4Sjoerg   std::pair<FileID, unsigned> EndInfo = SM.getDecomposedLoc(Range.getEnd());
70*e038c9c4Sjoerg   if (BeginInfo.first != EndInfo.first)
71*e038c9c4Sjoerg     return llvm::make_error<StringError>(
72*e038c9c4Sjoerg         errc::invalid_argument, "Range begins and ends in different files");
73*e038c9c4Sjoerg 
74*e038c9c4Sjoerg   if (BeginInfo.second > EndInfo.second)
75*e038c9c4Sjoerg     return llvm::make_error<StringError>(
76*e038c9c4Sjoerg         errc::invalid_argument, "Range's begin is past its end");
77*e038c9c4Sjoerg 
78*e038c9c4Sjoerg   return llvm::Error::success();
797330f729Sjoerg }
807330f729Sjoerg 
817330f729Sjoerg llvm::Optional<CharSourceRange>
getRangeForEdit(const CharSourceRange & EditRange,const SourceManager & SM,const LangOptions & LangOpts)827330f729Sjoerg clang::tooling::getRangeForEdit(const CharSourceRange &EditRange,
837330f729Sjoerg                                 const SourceManager &SM,
847330f729Sjoerg                                 const LangOptions &LangOpts) {
857330f729Sjoerg   // FIXME: makeFileCharRange() has the disadvantage of stripping off "identity"
867330f729Sjoerg   // macros. For example, if we're looking to rewrite the int literal 3 to 6,
877330f729Sjoerg   // and we have the following definition:
887330f729Sjoerg   //    #define DO_NOTHING(x) x
897330f729Sjoerg   // then
907330f729Sjoerg   //    foo(DO_NOTHING(3))
917330f729Sjoerg   // will be rewritten to
927330f729Sjoerg   //    foo(6)
937330f729Sjoerg   // rather than the arguably better
947330f729Sjoerg   //    foo(DO_NOTHING(6))
957330f729Sjoerg   // Decide whether the current behavior is desirable and modify if not.
967330f729Sjoerg   CharSourceRange Range = Lexer::makeFileCharRange(EditRange, SM, LangOpts);
97*e038c9c4Sjoerg   bool IsInvalid = llvm::errorToBool(validateEditRange(Range, SM));
98*e038c9c4Sjoerg   if (IsInvalid)
99*e038c9c4Sjoerg     return llvm::None;
1007330f729Sjoerg   return Range;
101*e038c9c4Sjoerg 
102*e038c9c4Sjoerg }
103*e038c9c4Sjoerg 
startsWithNewline(const SourceManager & SM,const Token & Tok)104*e038c9c4Sjoerg static bool startsWithNewline(const SourceManager &SM, const Token &Tok) {
105*e038c9c4Sjoerg   return isVerticalWhitespace(SM.getCharacterData(Tok.getLocation())[0]);
106*e038c9c4Sjoerg }
107*e038c9c4Sjoerg 
contains(const std::set<tok::TokenKind> & Terminators,const Token & Tok)108*e038c9c4Sjoerg static bool contains(const std::set<tok::TokenKind> &Terminators,
109*e038c9c4Sjoerg                      const Token &Tok) {
110*e038c9c4Sjoerg   return Terminators.count(Tok.getKind()) > 0;
111*e038c9c4Sjoerg }
112*e038c9c4Sjoerg 
113*e038c9c4Sjoerg // Returns the exclusive, *file* end location of the entity whose last token is
114*e038c9c4Sjoerg // at location 'EntityLast'. That is, it returns the location one past the last
115*e038c9c4Sjoerg // relevant character.
116*e038c9c4Sjoerg //
117*e038c9c4Sjoerg // Associated tokens include comments, horizontal whitespace and 'Terminators'
118*e038c9c4Sjoerg // -- optional tokens, which, if any are found, will be included; if
119*e038c9c4Sjoerg // 'Terminators' is empty, we will not include any extra tokens beyond comments
120*e038c9c4Sjoerg // and horizontal whitespace.
121*e038c9c4Sjoerg static SourceLocation
getEntityEndLoc(const SourceManager & SM,SourceLocation EntityLast,const std::set<tok::TokenKind> & Terminators,const LangOptions & LangOpts)122*e038c9c4Sjoerg getEntityEndLoc(const SourceManager &SM, SourceLocation EntityLast,
123*e038c9c4Sjoerg                 const std::set<tok::TokenKind> &Terminators,
124*e038c9c4Sjoerg                 const LangOptions &LangOpts) {
125*e038c9c4Sjoerg   assert(EntityLast.isValid() && "Invalid end location found.");
126*e038c9c4Sjoerg 
127*e038c9c4Sjoerg   // We remember the last location of a non-horizontal-whitespace token we have
128*e038c9c4Sjoerg   // lexed; this is the location up to which we will want to delete.
129*e038c9c4Sjoerg   // FIXME: Support using the spelling loc here for cases where we want to
130*e038c9c4Sjoerg   // analyze the macro text.
131*e038c9c4Sjoerg 
132*e038c9c4Sjoerg   CharSourceRange ExpansionRange = SM.getExpansionRange(EntityLast);
133*e038c9c4Sjoerg   // FIXME: Should check isTokenRange(), for the (rare) case that
134*e038c9c4Sjoerg   // `ExpansionRange` is a character range.
135*e038c9c4Sjoerg   std::unique_ptr<Lexer> Lexer = [&]() {
136*e038c9c4Sjoerg     bool Invalid = false;
137*e038c9c4Sjoerg     auto FileOffset = SM.getDecomposedLoc(ExpansionRange.getEnd());
138*e038c9c4Sjoerg     llvm::StringRef File = SM.getBufferData(FileOffset.first, &Invalid);
139*e038c9c4Sjoerg     assert(!Invalid && "Cannot get file/offset");
140*e038c9c4Sjoerg     return std::make_unique<clang::Lexer>(
141*e038c9c4Sjoerg         SM.getLocForStartOfFile(FileOffset.first), LangOpts, File.begin(),
142*e038c9c4Sjoerg         File.data() + FileOffset.second, File.end());
143*e038c9c4Sjoerg   }();
144*e038c9c4Sjoerg 
145*e038c9c4Sjoerg   // Tell Lexer to return whitespace as pseudo-tokens (kind is tok::unknown).
146*e038c9c4Sjoerg   Lexer->SetKeepWhitespaceMode(true);
147*e038c9c4Sjoerg 
148*e038c9c4Sjoerg   // Generally, the code we want to include looks like this ([] are optional),
149*e038c9c4Sjoerg   // If Terminators is empty:
150*e038c9c4Sjoerg   //   [ <comment> ] [ <newline> ]
151*e038c9c4Sjoerg   // Otherwise:
152*e038c9c4Sjoerg   //   ... <terminator> [ <comment> ] [ <newline> ]
153*e038c9c4Sjoerg 
154*e038c9c4Sjoerg   Token Tok;
155*e038c9c4Sjoerg   bool Terminated = false;
156*e038c9c4Sjoerg 
157*e038c9c4Sjoerg   // First, lex to the current token (which is the last token of the range that
158*e038c9c4Sjoerg   // is definitely associated with the decl). Then, we process the first token
159*e038c9c4Sjoerg   // separately from the rest based on conditions that hold specifically for
160*e038c9c4Sjoerg   // that first token.
161*e038c9c4Sjoerg   //
162*e038c9c4Sjoerg   // We do not search for a terminator if none is required or we've already
163*e038c9c4Sjoerg   // encountered it. Otherwise, if the original `EntityLast` location was in a
164*e038c9c4Sjoerg   // macro expansion, we don't have visibility into the text, so we assume we've
165*e038c9c4Sjoerg   // already terminated. However, we note this assumption with
166*e038c9c4Sjoerg   // `TerminatedByMacro`, because we'll want to handle it somewhat differently
167*e038c9c4Sjoerg   // for the terminators semicolon and comma. These terminators can be safely
168*e038c9c4Sjoerg   // associated with the entity when they appear after the macro -- extra
169*e038c9c4Sjoerg   // semicolons have no effect on the program and a well-formed program won't
170*e038c9c4Sjoerg   // have multiple commas in a row, so we're guaranteed that there is only one.
171*e038c9c4Sjoerg   //
172*e038c9c4Sjoerg   // FIXME: This handling of macros is more conservative than necessary. When
173*e038c9c4Sjoerg   // the end of the expansion coincides with the end of the node, we can still
174*e038c9c4Sjoerg   // safely analyze the code. But, it is more complicated, because we need to
175*e038c9c4Sjoerg   // start by lexing the spelling loc for the first token and then switch to the
176*e038c9c4Sjoerg   // expansion loc.
177*e038c9c4Sjoerg   bool TerminatedByMacro = false;
178*e038c9c4Sjoerg   Lexer->LexFromRawLexer(Tok);
179*e038c9c4Sjoerg   if (Terminators.empty() || contains(Terminators, Tok))
180*e038c9c4Sjoerg     Terminated = true;
181*e038c9c4Sjoerg   else if (EntityLast.isMacroID()) {
182*e038c9c4Sjoerg     Terminated = true;
183*e038c9c4Sjoerg     TerminatedByMacro = true;
184*e038c9c4Sjoerg   }
185*e038c9c4Sjoerg 
186*e038c9c4Sjoerg   // We save the most recent candidate for the exclusive end location.
187*e038c9c4Sjoerg   SourceLocation End = Tok.getEndLoc();
188*e038c9c4Sjoerg 
189*e038c9c4Sjoerg   while (!Terminated) {
190*e038c9c4Sjoerg     // Lex the next token we want to possibly expand the range with.
191*e038c9c4Sjoerg     Lexer->LexFromRawLexer(Tok);
192*e038c9c4Sjoerg 
193*e038c9c4Sjoerg     switch (Tok.getKind()) {
194*e038c9c4Sjoerg     case tok::eof:
195*e038c9c4Sjoerg     // Unexpected separators.
196*e038c9c4Sjoerg     case tok::l_brace:
197*e038c9c4Sjoerg     case tok::r_brace:
198*e038c9c4Sjoerg     case tok::comma:
199*e038c9c4Sjoerg       return End;
200*e038c9c4Sjoerg     // Whitespace pseudo-tokens.
201*e038c9c4Sjoerg     case tok::unknown:
202*e038c9c4Sjoerg       if (startsWithNewline(SM, Tok))
203*e038c9c4Sjoerg         // Include at least until the end of the line.
204*e038c9c4Sjoerg         End = Tok.getEndLoc();
205*e038c9c4Sjoerg       break;
206*e038c9c4Sjoerg     default:
207*e038c9c4Sjoerg       if (contains(Terminators, Tok))
208*e038c9c4Sjoerg         Terminated = true;
209*e038c9c4Sjoerg       End = Tok.getEndLoc();
210*e038c9c4Sjoerg       break;
211*e038c9c4Sjoerg     }
212*e038c9c4Sjoerg   }
213*e038c9c4Sjoerg 
214*e038c9c4Sjoerg   do {
215*e038c9c4Sjoerg     // Lex the next token we want to possibly expand the range with.
216*e038c9c4Sjoerg     Lexer->LexFromRawLexer(Tok);
217*e038c9c4Sjoerg 
218*e038c9c4Sjoerg     switch (Tok.getKind()) {
219*e038c9c4Sjoerg     case tok::unknown:
220*e038c9c4Sjoerg       if (startsWithNewline(SM, Tok))
221*e038c9c4Sjoerg         // We're done, but include this newline.
222*e038c9c4Sjoerg         return Tok.getEndLoc();
223*e038c9c4Sjoerg       break;
224*e038c9c4Sjoerg     case tok::comment:
225*e038c9c4Sjoerg       // Include any comments we find on the way.
226*e038c9c4Sjoerg       End = Tok.getEndLoc();
227*e038c9c4Sjoerg       break;
228*e038c9c4Sjoerg     case tok::semi:
229*e038c9c4Sjoerg     case tok::comma:
230*e038c9c4Sjoerg       if (TerminatedByMacro && contains(Terminators, Tok)) {
231*e038c9c4Sjoerg         End = Tok.getEndLoc();
232*e038c9c4Sjoerg         // We've found a real terminator.
233*e038c9c4Sjoerg         TerminatedByMacro = false;
234*e038c9c4Sjoerg         break;
235*e038c9c4Sjoerg       }
236*e038c9c4Sjoerg       // Found an unrelated token; stop and don't include it.
237*e038c9c4Sjoerg       return End;
238*e038c9c4Sjoerg     default:
239*e038c9c4Sjoerg       // Found an unrelated token; stop and don't include it.
240*e038c9c4Sjoerg       return End;
241*e038c9c4Sjoerg     }
242*e038c9c4Sjoerg   } while (true);
243*e038c9c4Sjoerg }
244*e038c9c4Sjoerg 
245*e038c9c4Sjoerg // Returns the expected terminator tokens for the given declaration.
246*e038c9c4Sjoerg //
247*e038c9c4Sjoerg // If we do not know the correct terminator token, returns an empty set.
248*e038c9c4Sjoerg //
249*e038c9c4Sjoerg // There are cases where we have more than one possible terminator (for example,
250*e038c9c4Sjoerg // we find either a comma or a semicolon after a VarDecl).
getTerminators(const Decl & D)251*e038c9c4Sjoerg static std::set<tok::TokenKind> getTerminators(const Decl &D) {
252*e038c9c4Sjoerg   if (llvm::isa<RecordDecl>(D) || llvm::isa<UsingDecl>(D))
253*e038c9c4Sjoerg     return {tok::semi};
254*e038c9c4Sjoerg 
255*e038c9c4Sjoerg   if (llvm::isa<FunctionDecl>(D) || llvm::isa<LinkageSpecDecl>(D))
256*e038c9c4Sjoerg     return {tok::r_brace, tok::semi};
257*e038c9c4Sjoerg 
258*e038c9c4Sjoerg   if (llvm::isa<VarDecl>(D) || llvm::isa<FieldDecl>(D))
259*e038c9c4Sjoerg     return {tok::comma, tok::semi};
260*e038c9c4Sjoerg 
261*e038c9c4Sjoerg   return {};
262*e038c9c4Sjoerg }
263*e038c9c4Sjoerg 
264*e038c9c4Sjoerg // Starting from `Loc`, skips whitespace up to, and including, a single
265*e038c9c4Sjoerg // newline. Returns the (exclusive) end of any skipped whitespace (that is, the
266*e038c9c4Sjoerg // location immediately after the whitespace).
skipWhitespaceAndNewline(const SourceManager & SM,SourceLocation Loc,const LangOptions & LangOpts)267*e038c9c4Sjoerg static SourceLocation skipWhitespaceAndNewline(const SourceManager &SM,
268*e038c9c4Sjoerg                                                SourceLocation Loc,
269*e038c9c4Sjoerg                                                const LangOptions &LangOpts) {
270*e038c9c4Sjoerg   const char *LocChars = SM.getCharacterData(Loc);
271*e038c9c4Sjoerg   int i = 0;
272*e038c9c4Sjoerg   while (isHorizontalWhitespace(LocChars[i]))
273*e038c9c4Sjoerg     ++i;
274*e038c9c4Sjoerg   if (isVerticalWhitespace(LocChars[i]))
275*e038c9c4Sjoerg     ++i;
276*e038c9c4Sjoerg   return Loc.getLocWithOffset(i);
277*e038c9c4Sjoerg }
278*e038c9c4Sjoerg 
279*e038c9c4Sjoerg // Is `Loc` separated from any following decl by something meaningful (e.g. an
280*e038c9c4Sjoerg // empty line, a comment), ignoring horizontal whitespace?  Since this is a
281*e038c9c4Sjoerg // heuristic, we return false when in doubt.  `Loc` cannot be the first location
282*e038c9c4Sjoerg // in the file.
atOrBeforeSeparation(const SourceManager & SM,SourceLocation Loc,const LangOptions & LangOpts)283*e038c9c4Sjoerg static bool atOrBeforeSeparation(const SourceManager &SM, SourceLocation Loc,
284*e038c9c4Sjoerg                                  const LangOptions &LangOpts) {
285*e038c9c4Sjoerg   // If the preceding character is a newline, we'll check for an empty line as a
286*e038c9c4Sjoerg   // separator. However, we can't identify an empty line using tokens, so we
287*e038c9c4Sjoerg   // analyse the characters. If we try to use tokens, we'll just end up with a
288*e038c9c4Sjoerg   // whitespace token, whose characters we'd have to analyse anyhow.
289*e038c9c4Sjoerg   bool Invalid = false;
290*e038c9c4Sjoerg   const char *LocChars =
291*e038c9c4Sjoerg       SM.getCharacterData(Loc.getLocWithOffset(-1), &Invalid);
292*e038c9c4Sjoerg   assert(!Invalid &&
293*e038c9c4Sjoerg          "Loc must be a valid character and not the first of the source file.");
294*e038c9c4Sjoerg   if (isVerticalWhitespace(LocChars[0])) {
295*e038c9c4Sjoerg     for (int i = 1; isWhitespace(LocChars[i]); ++i)
296*e038c9c4Sjoerg       if (isVerticalWhitespace(LocChars[i]))
297*e038c9c4Sjoerg         return true;
298*e038c9c4Sjoerg   }
299*e038c9c4Sjoerg   // We didn't find an empty line, so lex the next token, skipping past any
300*e038c9c4Sjoerg   // whitespace we just scanned.
301*e038c9c4Sjoerg   Token Tok;
302*e038c9c4Sjoerg   bool Failed = Lexer::getRawToken(Loc, Tok, SM, LangOpts,
303*e038c9c4Sjoerg                                    /*IgnoreWhiteSpace=*/true);
304*e038c9c4Sjoerg   if (Failed)
305*e038c9c4Sjoerg     // Any text that confuses the lexer seems fair to consider a separation.
306*e038c9c4Sjoerg     return true;
307*e038c9c4Sjoerg 
308*e038c9c4Sjoerg   switch (Tok.getKind()) {
309*e038c9c4Sjoerg   case tok::comment:
310*e038c9c4Sjoerg   case tok::l_brace:
311*e038c9c4Sjoerg   case tok::r_brace:
312*e038c9c4Sjoerg   case tok::eof:
313*e038c9c4Sjoerg     return true;
314*e038c9c4Sjoerg   default:
315*e038c9c4Sjoerg     return false;
316*e038c9c4Sjoerg   }
317*e038c9c4Sjoerg }
318*e038c9c4Sjoerg 
getAssociatedRange(const Decl & Decl,ASTContext & Context)319*e038c9c4Sjoerg CharSourceRange tooling::getAssociatedRange(const Decl &Decl,
320*e038c9c4Sjoerg                                             ASTContext &Context) {
321*e038c9c4Sjoerg   const SourceManager &SM = Context.getSourceManager();
322*e038c9c4Sjoerg   const LangOptions &LangOpts = Context.getLangOpts();
323*e038c9c4Sjoerg   CharSourceRange Range = CharSourceRange::getTokenRange(Decl.getSourceRange());
324*e038c9c4Sjoerg 
325*e038c9c4Sjoerg   // First, expand to the start of the template<> declaration if necessary.
326*e038c9c4Sjoerg   if (const auto *Record = llvm::dyn_cast<CXXRecordDecl>(&Decl)) {
327*e038c9c4Sjoerg     if (const auto *T = Record->getDescribedClassTemplate())
328*e038c9c4Sjoerg       if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
329*e038c9c4Sjoerg         Range.setBegin(T->getBeginLoc());
330*e038c9c4Sjoerg   } else if (const auto *F = llvm::dyn_cast<FunctionDecl>(&Decl)) {
331*e038c9c4Sjoerg     if (const auto *T = F->getDescribedFunctionTemplate())
332*e038c9c4Sjoerg       if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
333*e038c9c4Sjoerg         Range.setBegin(T->getBeginLoc());
334*e038c9c4Sjoerg   }
335*e038c9c4Sjoerg 
336*e038c9c4Sjoerg   // Next, expand the end location past trailing comments to include a potential
337*e038c9c4Sjoerg   // newline at the end of the decl's line.
338*e038c9c4Sjoerg   Range.setEnd(
339*e038c9c4Sjoerg       getEntityEndLoc(SM, Decl.getEndLoc(), getTerminators(Decl), LangOpts));
340*e038c9c4Sjoerg   Range.setTokenRange(false);
341*e038c9c4Sjoerg 
342*e038c9c4Sjoerg   // Expand to include preceeding associated comments. We ignore any comments
343*e038c9c4Sjoerg   // that are not preceeding the decl, since we've already skipped trailing
344*e038c9c4Sjoerg   // comments with getEntityEndLoc.
345*e038c9c4Sjoerg   if (const RawComment *Comment =
346*e038c9c4Sjoerg           Decl.getASTContext().getRawCommentForDeclNoCache(&Decl))
347*e038c9c4Sjoerg     // Only include a preceding comment if:
348*e038c9c4Sjoerg     // * it is *not* separate from the declaration (not including any newline
349*e038c9c4Sjoerg     //   that immediately follows the comment),
350*e038c9c4Sjoerg     // * the decl *is* separate from any following entity (so, there are no
351*e038c9c4Sjoerg     //   other entities the comment could refer to), and
352*e038c9c4Sjoerg     // * it is not a IfThisThenThat lint check.
353*e038c9c4Sjoerg     if (SM.isBeforeInTranslationUnit(Comment->getBeginLoc(),
354*e038c9c4Sjoerg                                      Range.getBegin()) &&
355*e038c9c4Sjoerg         !atOrBeforeSeparation(
356*e038c9c4Sjoerg             SM, skipWhitespaceAndNewline(SM, Comment->getEndLoc(), LangOpts),
357*e038c9c4Sjoerg             LangOpts) &&
358*e038c9c4Sjoerg         atOrBeforeSeparation(SM, Range.getEnd(), LangOpts)) {
359*e038c9c4Sjoerg       const StringRef CommentText = Comment->getRawText(SM);
360*e038c9c4Sjoerg       if (!CommentText.contains("LINT.IfChange") &&
361*e038c9c4Sjoerg           !CommentText.contains("LINT.ThenChange"))
362*e038c9c4Sjoerg         Range.setBegin(Comment->getBeginLoc());
363*e038c9c4Sjoerg     }
364*e038c9c4Sjoerg   // Add leading attributes.
365*e038c9c4Sjoerg   for (auto *Attr : Decl.attrs()) {
366*e038c9c4Sjoerg     if (Attr->getLocation().isInvalid() ||
367*e038c9c4Sjoerg         !SM.isBeforeInTranslationUnit(Attr->getLocation(), Range.getBegin()))
368*e038c9c4Sjoerg       continue;
369*e038c9c4Sjoerg     Range.setBegin(Attr->getLocation());
370*e038c9c4Sjoerg 
371*e038c9c4Sjoerg     // Extend to the left '[[' or '__attribute((' if we saw the attribute,
372*e038c9c4Sjoerg     // unless it is not a valid location.
373*e038c9c4Sjoerg     bool Invalid;
374*e038c9c4Sjoerg     StringRef Source =
375*e038c9c4Sjoerg         SM.getBufferData(SM.getFileID(Range.getBegin()), &Invalid);
376*e038c9c4Sjoerg     if (Invalid)
377*e038c9c4Sjoerg       continue;
378*e038c9c4Sjoerg     llvm::StringRef BeforeAttr =
379*e038c9c4Sjoerg         Source.substr(0, SM.getFileOffset(Range.getBegin()));
380*e038c9c4Sjoerg     llvm::StringRef BeforeAttrStripped = BeforeAttr.rtrim();
381*e038c9c4Sjoerg 
382*e038c9c4Sjoerg     for (llvm::StringRef Prefix : {"[[", "__attribute__(("}) {
383*e038c9c4Sjoerg       // Handle whitespace between attribute prefix and attribute value.
384*e038c9c4Sjoerg       if (BeforeAttrStripped.endswith(Prefix)) {
385*e038c9c4Sjoerg         // Move start to start position of prefix, which is
386*e038c9c4Sjoerg         // length(BeforeAttr) - length(BeforeAttrStripped) + length(Prefix)
387*e038c9c4Sjoerg         // positions to the left.
388*e038c9c4Sjoerg         Range.setBegin(Range.getBegin().getLocWithOffset(static_cast<int>(
389*e038c9c4Sjoerg             -BeforeAttr.size() + BeforeAttrStripped.size() - Prefix.size())));
390*e038c9c4Sjoerg         break;
391*e038c9c4Sjoerg         // If we didn't see '[[' or '__attribute' it's probably coming from a
392*e038c9c4Sjoerg         // macro expansion which is already handled by makeFileCharRange(),
393*e038c9c4Sjoerg         // below.
394*e038c9c4Sjoerg       }
395*e038c9c4Sjoerg     }
396*e038c9c4Sjoerg   }
397*e038c9c4Sjoerg 
398*e038c9c4Sjoerg   // Range.getEnd() is already fully un-expanded by getEntityEndLoc. But,
399*e038c9c4Sjoerg   // Range.getBegin() may be inside an expansion.
400*e038c9c4Sjoerg   return Lexer::makeFileCharRange(Range, SM, LangOpts);
4017330f729Sjoerg }
402