17330f729Sjoerg //===--- SourceCode.cpp - Source code manipulation routines -----*- C++ -*-===//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg // This file provides functions that simplify extraction of source code.
107330f729Sjoerg //
117330f729Sjoerg //===----------------------------------------------------------------------===//
127330f729Sjoerg #include "clang/Tooling/Transformer/SourceCode.h"
13*e038c9c4Sjoerg #include "clang/AST/ASTContext.h"
14*e038c9c4Sjoerg #include "clang/AST/Attr.h"
15*e038c9c4Sjoerg #include "clang/AST/Comment.h"
16*e038c9c4Sjoerg #include "clang/AST/Decl.h"
17*e038c9c4Sjoerg #include "clang/AST/DeclCXX.h"
18*e038c9c4Sjoerg #include "clang/AST/DeclTemplate.h"
19*e038c9c4Sjoerg #include "clang/AST/Expr.h"
20*e038c9c4Sjoerg #include "clang/Basic/SourceManager.h"
217330f729Sjoerg #include "clang/Lex/Lexer.h"
22*e038c9c4Sjoerg #include "llvm/Support/Errc.h"
23*e038c9c4Sjoerg #include "llvm/Support/Error.h"
24*e038c9c4Sjoerg #include <set>
257330f729Sjoerg
267330f729Sjoerg using namespace clang;
277330f729Sjoerg
28*e038c9c4Sjoerg using llvm::errc;
29*e038c9c4Sjoerg using llvm::StringError;
30*e038c9c4Sjoerg
getText(CharSourceRange Range,const ASTContext & Context)317330f729Sjoerg StringRef clang::tooling::getText(CharSourceRange Range,
327330f729Sjoerg const ASTContext &Context) {
337330f729Sjoerg return Lexer::getSourceText(Range, Context.getSourceManager(),
347330f729Sjoerg Context.getLangOpts());
357330f729Sjoerg }
367330f729Sjoerg
maybeExtendRange(CharSourceRange Range,tok::TokenKind Next,ASTContext & Context)377330f729Sjoerg CharSourceRange clang::tooling::maybeExtendRange(CharSourceRange Range,
387330f729Sjoerg tok::TokenKind Next,
397330f729Sjoerg ASTContext &Context) {
40*e038c9c4Sjoerg CharSourceRange R = Lexer::getAsCharRange(Range, Context.getSourceManager(),
41*e038c9c4Sjoerg Context.getLangOpts());
42*e038c9c4Sjoerg if (R.isInvalid())
437330f729Sjoerg return Range;
44*e038c9c4Sjoerg Token Tok;
45*e038c9c4Sjoerg bool Err =
46*e038c9c4Sjoerg Lexer::getRawToken(R.getEnd(), Tok, Context.getSourceManager(),
47*e038c9c4Sjoerg Context.getLangOpts(), /*IgnoreWhiteSpace=*/true);
48*e038c9c4Sjoerg if (Err || !Tok.is(Next))
49*e038c9c4Sjoerg return Range;
50*e038c9c4Sjoerg return CharSourceRange::getTokenRange(Range.getBegin(), Tok.getLocation());
51*e038c9c4Sjoerg }
52*e038c9c4Sjoerg
validateEditRange(const CharSourceRange & Range,const SourceManager & SM)53*e038c9c4Sjoerg llvm::Error clang::tooling::validateEditRange(const CharSourceRange &Range,
54*e038c9c4Sjoerg const SourceManager &SM) {
55*e038c9c4Sjoerg if (Range.isInvalid())
56*e038c9c4Sjoerg return llvm::make_error<StringError>(errc::invalid_argument,
57*e038c9c4Sjoerg "Invalid range");
58*e038c9c4Sjoerg
59*e038c9c4Sjoerg if (Range.getBegin().isMacroID() || Range.getEnd().isMacroID())
60*e038c9c4Sjoerg return llvm::make_error<StringError>(
61*e038c9c4Sjoerg errc::invalid_argument, "Range starts or ends in a macro expansion");
62*e038c9c4Sjoerg
63*e038c9c4Sjoerg if (SM.isInSystemHeader(Range.getBegin()) ||
64*e038c9c4Sjoerg SM.isInSystemHeader(Range.getEnd()))
65*e038c9c4Sjoerg return llvm::make_error<StringError>(errc::invalid_argument,
66*e038c9c4Sjoerg "Range is in system header");
67*e038c9c4Sjoerg
68*e038c9c4Sjoerg std::pair<FileID, unsigned> BeginInfo = SM.getDecomposedLoc(Range.getBegin());
69*e038c9c4Sjoerg std::pair<FileID, unsigned> EndInfo = SM.getDecomposedLoc(Range.getEnd());
70*e038c9c4Sjoerg if (BeginInfo.first != EndInfo.first)
71*e038c9c4Sjoerg return llvm::make_error<StringError>(
72*e038c9c4Sjoerg errc::invalid_argument, "Range begins and ends in different files");
73*e038c9c4Sjoerg
74*e038c9c4Sjoerg if (BeginInfo.second > EndInfo.second)
75*e038c9c4Sjoerg return llvm::make_error<StringError>(
76*e038c9c4Sjoerg errc::invalid_argument, "Range's begin is past its end");
77*e038c9c4Sjoerg
78*e038c9c4Sjoerg return llvm::Error::success();
797330f729Sjoerg }
807330f729Sjoerg
817330f729Sjoerg llvm::Optional<CharSourceRange>
getRangeForEdit(const CharSourceRange & EditRange,const SourceManager & SM,const LangOptions & LangOpts)827330f729Sjoerg clang::tooling::getRangeForEdit(const CharSourceRange &EditRange,
837330f729Sjoerg const SourceManager &SM,
847330f729Sjoerg const LangOptions &LangOpts) {
857330f729Sjoerg // FIXME: makeFileCharRange() has the disadvantage of stripping off "identity"
867330f729Sjoerg // macros. For example, if we're looking to rewrite the int literal 3 to 6,
877330f729Sjoerg // and we have the following definition:
887330f729Sjoerg // #define DO_NOTHING(x) x
897330f729Sjoerg // then
907330f729Sjoerg // foo(DO_NOTHING(3))
917330f729Sjoerg // will be rewritten to
927330f729Sjoerg // foo(6)
937330f729Sjoerg // rather than the arguably better
947330f729Sjoerg // foo(DO_NOTHING(6))
957330f729Sjoerg // Decide whether the current behavior is desirable and modify if not.
967330f729Sjoerg CharSourceRange Range = Lexer::makeFileCharRange(EditRange, SM, LangOpts);
97*e038c9c4Sjoerg bool IsInvalid = llvm::errorToBool(validateEditRange(Range, SM));
98*e038c9c4Sjoerg if (IsInvalid)
99*e038c9c4Sjoerg return llvm::None;
1007330f729Sjoerg return Range;
101*e038c9c4Sjoerg
102*e038c9c4Sjoerg }
103*e038c9c4Sjoerg
startsWithNewline(const SourceManager & SM,const Token & Tok)104*e038c9c4Sjoerg static bool startsWithNewline(const SourceManager &SM, const Token &Tok) {
105*e038c9c4Sjoerg return isVerticalWhitespace(SM.getCharacterData(Tok.getLocation())[0]);
106*e038c9c4Sjoerg }
107*e038c9c4Sjoerg
contains(const std::set<tok::TokenKind> & Terminators,const Token & Tok)108*e038c9c4Sjoerg static bool contains(const std::set<tok::TokenKind> &Terminators,
109*e038c9c4Sjoerg const Token &Tok) {
110*e038c9c4Sjoerg return Terminators.count(Tok.getKind()) > 0;
111*e038c9c4Sjoerg }
112*e038c9c4Sjoerg
113*e038c9c4Sjoerg // Returns the exclusive, *file* end location of the entity whose last token is
114*e038c9c4Sjoerg // at location 'EntityLast'. That is, it returns the location one past the last
115*e038c9c4Sjoerg // relevant character.
116*e038c9c4Sjoerg //
117*e038c9c4Sjoerg // Associated tokens include comments, horizontal whitespace and 'Terminators'
118*e038c9c4Sjoerg // -- optional tokens, which, if any are found, will be included; if
119*e038c9c4Sjoerg // 'Terminators' is empty, we will not include any extra tokens beyond comments
120*e038c9c4Sjoerg // and horizontal whitespace.
121*e038c9c4Sjoerg static SourceLocation
getEntityEndLoc(const SourceManager & SM,SourceLocation EntityLast,const std::set<tok::TokenKind> & Terminators,const LangOptions & LangOpts)122*e038c9c4Sjoerg getEntityEndLoc(const SourceManager &SM, SourceLocation EntityLast,
123*e038c9c4Sjoerg const std::set<tok::TokenKind> &Terminators,
124*e038c9c4Sjoerg const LangOptions &LangOpts) {
125*e038c9c4Sjoerg assert(EntityLast.isValid() && "Invalid end location found.");
126*e038c9c4Sjoerg
127*e038c9c4Sjoerg // We remember the last location of a non-horizontal-whitespace token we have
128*e038c9c4Sjoerg // lexed; this is the location up to which we will want to delete.
129*e038c9c4Sjoerg // FIXME: Support using the spelling loc here for cases where we want to
130*e038c9c4Sjoerg // analyze the macro text.
131*e038c9c4Sjoerg
132*e038c9c4Sjoerg CharSourceRange ExpansionRange = SM.getExpansionRange(EntityLast);
133*e038c9c4Sjoerg // FIXME: Should check isTokenRange(), for the (rare) case that
134*e038c9c4Sjoerg // `ExpansionRange` is a character range.
135*e038c9c4Sjoerg std::unique_ptr<Lexer> Lexer = [&]() {
136*e038c9c4Sjoerg bool Invalid = false;
137*e038c9c4Sjoerg auto FileOffset = SM.getDecomposedLoc(ExpansionRange.getEnd());
138*e038c9c4Sjoerg llvm::StringRef File = SM.getBufferData(FileOffset.first, &Invalid);
139*e038c9c4Sjoerg assert(!Invalid && "Cannot get file/offset");
140*e038c9c4Sjoerg return std::make_unique<clang::Lexer>(
141*e038c9c4Sjoerg SM.getLocForStartOfFile(FileOffset.first), LangOpts, File.begin(),
142*e038c9c4Sjoerg File.data() + FileOffset.second, File.end());
143*e038c9c4Sjoerg }();
144*e038c9c4Sjoerg
145*e038c9c4Sjoerg // Tell Lexer to return whitespace as pseudo-tokens (kind is tok::unknown).
146*e038c9c4Sjoerg Lexer->SetKeepWhitespaceMode(true);
147*e038c9c4Sjoerg
148*e038c9c4Sjoerg // Generally, the code we want to include looks like this ([] are optional),
149*e038c9c4Sjoerg // If Terminators is empty:
150*e038c9c4Sjoerg // [ <comment> ] [ <newline> ]
151*e038c9c4Sjoerg // Otherwise:
152*e038c9c4Sjoerg // ... <terminator> [ <comment> ] [ <newline> ]
153*e038c9c4Sjoerg
154*e038c9c4Sjoerg Token Tok;
155*e038c9c4Sjoerg bool Terminated = false;
156*e038c9c4Sjoerg
157*e038c9c4Sjoerg // First, lex to the current token (which is the last token of the range that
158*e038c9c4Sjoerg // is definitely associated with the decl). Then, we process the first token
159*e038c9c4Sjoerg // separately from the rest based on conditions that hold specifically for
160*e038c9c4Sjoerg // that first token.
161*e038c9c4Sjoerg //
162*e038c9c4Sjoerg // We do not search for a terminator if none is required or we've already
163*e038c9c4Sjoerg // encountered it. Otherwise, if the original `EntityLast` location was in a
164*e038c9c4Sjoerg // macro expansion, we don't have visibility into the text, so we assume we've
165*e038c9c4Sjoerg // already terminated. However, we note this assumption with
166*e038c9c4Sjoerg // `TerminatedByMacro`, because we'll want to handle it somewhat differently
167*e038c9c4Sjoerg // for the terminators semicolon and comma. These terminators can be safely
168*e038c9c4Sjoerg // associated with the entity when they appear after the macro -- extra
169*e038c9c4Sjoerg // semicolons have no effect on the program and a well-formed program won't
170*e038c9c4Sjoerg // have multiple commas in a row, so we're guaranteed that there is only one.
171*e038c9c4Sjoerg //
172*e038c9c4Sjoerg // FIXME: This handling of macros is more conservative than necessary. When
173*e038c9c4Sjoerg // the end of the expansion coincides with the end of the node, we can still
174*e038c9c4Sjoerg // safely analyze the code. But, it is more complicated, because we need to
175*e038c9c4Sjoerg // start by lexing the spelling loc for the first token and then switch to the
176*e038c9c4Sjoerg // expansion loc.
177*e038c9c4Sjoerg bool TerminatedByMacro = false;
178*e038c9c4Sjoerg Lexer->LexFromRawLexer(Tok);
179*e038c9c4Sjoerg if (Terminators.empty() || contains(Terminators, Tok))
180*e038c9c4Sjoerg Terminated = true;
181*e038c9c4Sjoerg else if (EntityLast.isMacroID()) {
182*e038c9c4Sjoerg Terminated = true;
183*e038c9c4Sjoerg TerminatedByMacro = true;
184*e038c9c4Sjoerg }
185*e038c9c4Sjoerg
186*e038c9c4Sjoerg // We save the most recent candidate for the exclusive end location.
187*e038c9c4Sjoerg SourceLocation End = Tok.getEndLoc();
188*e038c9c4Sjoerg
189*e038c9c4Sjoerg while (!Terminated) {
190*e038c9c4Sjoerg // Lex the next token we want to possibly expand the range with.
191*e038c9c4Sjoerg Lexer->LexFromRawLexer(Tok);
192*e038c9c4Sjoerg
193*e038c9c4Sjoerg switch (Tok.getKind()) {
194*e038c9c4Sjoerg case tok::eof:
195*e038c9c4Sjoerg // Unexpected separators.
196*e038c9c4Sjoerg case tok::l_brace:
197*e038c9c4Sjoerg case tok::r_brace:
198*e038c9c4Sjoerg case tok::comma:
199*e038c9c4Sjoerg return End;
200*e038c9c4Sjoerg // Whitespace pseudo-tokens.
201*e038c9c4Sjoerg case tok::unknown:
202*e038c9c4Sjoerg if (startsWithNewline(SM, Tok))
203*e038c9c4Sjoerg // Include at least until the end of the line.
204*e038c9c4Sjoerg End = Tok.getEndLoc();
205*e038c9c4Sjoerg break;
206*e038c9c4Sjoerg default:
207*e038c9c4Sjoerg if (contains(Terminators, Tok))
208*e038c9c4Sjoerg Terminated = true;
209*e038c9c4Sjoerg End = Tok.getEndLoc();
210*e038c9c4Sjoerg break;
211*e038c9c4Sjoerg }
212*e038c9c4Sjoerg }
213*e038c9c4Sjoerg
214*e038c9c4Sjoerg do {
215*e038c9c4Sjoerg // Lex the next token we want to possibly expand the range with.
216*e038c9c4Sjoerg Lexer->LexFromRawLexer(Tok);
217*e038c9c4Sjoerg
218*e038c9c4Sjoerg switch (Tok.getKind()) {
219*e038c9c4Sjoerg case tok::unknown:
220*e038c9c4Sjoerg if (startsWithNewline(SM, Tok))
221*e038c9c4Sjoerg // We're done, but include this newline.
222*e038c9c4Sjoerg return Tok.getEndLoc();
223*e038c9c4Sjoerg break;
224*e038c9c4Sjoerg case tok::comment:
225*e038c9c4Sjoerg // Include any comments we find on the way.
226*e038c9c4Sjoerg End = Tok.getEndLoc();
227*e038c9c4Sjoerg break;
228*e038c9c4Sjoerg case tok::semi:
229*e038c9c4Sjoerg case tok::comma:
230*e038c9c4Sjoerg if (TerminatedByMacro && contains(Terminators, Tok)) {
231*e038c9c4Sjoerg End = Tok.getEndLoc();
232*e038c9c4Sjoerg // We've found a real terminator.
233*e038c9c4Sjoerg TerminatedByMacro = false;
234*e038c9c4Sjoerg break;
235*e038c9c4Sjoerg }
236*e038c9c4Sjoerg // Found an unrelated token; stop and don't include it.
237*e038c9c4Sjoerg return End;
238*e038c9c4Sjoerg default:
239*e038c9c4Sjoerg // Found an unrelated token; stop and don't include it.
240*e038c9c4Sjoerg return End;
241*e038c9c4Sjoerg }
242*e038c9c4Sjoerg } while (true);
243*e038c9c4Sjoerg }
244*e038c9c4Sjoerg
245*e038c9c4Sjoerg // Returns the expected terminator tokens for the given declaration.
246*e038c9c4Sjoerg //
247*e038c9c4Sjoerg // If we do not know the correct terminator token, returns an empty set.
248*e038c9c4Sjoerg //
249*e038c9c4Sjoerg // There are cases where we have more than one possible terminator (for example,
250*e038c9c4Sjoerg // we find either a comma or a semicolon after a VarDecl).
getTerminators(const Decl & D)251*e038c9c4Sjoerg static std::set<tok::TokenKind> getTerminators(const Decl &D) {
252*e038c9c4Sjoerg if (llvm::isa<RecordDecl>(D) || llvm::isa<UsingDecl>(D))
253*e038c9c4Sjoerg return {tok::semi};
254*e038c9c4Sjoerg
255*e038c9c4Sjoerg if (llvm::isa<FunctionDecl>(D) || llvm::isa<LinkageSpecDecl>(D))
256*e038c9c4Sjoerg return {tok::r_brace, tok::semi};
257*e038c9c4Sjoerg
258*e038c9c4Sjoerg if (llvm::isa<VarDecl>(D) || llvm::isa<FieldDecl>(D))
259*e038c9c4Sjoerg return {tok::comma, tok::semi};
260*e038c9c4Sjoerg
261*e038c9c4Sjoerg return {};
262*e038c9c4Sjoerg }
263*e038c9c4Sjoerg
264*e038c9c4Sjoerg // Starting from `Loc`, skips whitespace up to, and including, a single
265*e038c9c4Sjoerg // newline. Returns the (exclusive) end of any skipped whitespace (that is, the
266*e038c9c4Sjoerg // location immediately after the whitespace).
skipWhitespaceAndNewline(const SourceManager & SM,SourceLocation Loc,const LangOptions & LangOpts)267*e038c9c4Sjoerg static SourceLocation skipWhitespaceAndNewline(const SourceManager &SM,
268*e038c9c4Sjoerg SourceLocation Loc,
269*e038c9c4Sjoerg const LangOptions &LangOpts) {
270*e038c9c4Sjoerg const char *LocChars = SM.getCharacterData(Loc);
271*e038c9c4Sjoerg int i = 0;
272*e038c9c4Sjoerg while (isHorizontalWhitespace(LocChars[i]))
273*e038c9c4Sjoerg ++i;
274*e038c9c4Sjoerg if (isVerticalWhitespace(LocChars[i]))
275*e038c9c4Sjoerg ++i;
276*e038c9c4Sjoerg return Loc.getLocWithOffset(i);
277*e038c9c4Sjoerg }
278*e038c9c4Sjoerg
279*e038c9c4Sjoerg // Is `Loc` separated from any following decl by something meaningful (e.g. an
280*e038c9c4Sjoerg // empty line, a comment), ignoring horizontal whitespace? Since this is a
281*e038c9c4Sjoerg // heuristic, we return false when in doubt. `Loc` cannot be the first location
282*e038c9c4Sjoerg // in the file.
atOrBeforeSeparation(const SourceManager & SM,SourceLocation Loc,const LangOptions & LangOpts)283*e038c9c4Sjoerg static bool atOrBeforeSeparation(const SourceManager &SM, SourceLocation Loc,
284*e038c9c4Sjoerg const LangOptions &LangOpts) {
285*e038c9c4Sjoerg // If the preceding character is a newline, we'll check for an empty line as a
286*e038c9c4Sjoerg // separator. However, we can't identify an empty line using tokens, so we
287*e038c9c4Sjoerg // analyse the characters. If we try to use tokens, we'll just end up with a
288*e038c9c4Sjoerg // whitespace token, whose characters we'd have to analyse anyhow.
289*e038c9c4Sjoerg bool Invalid = false;
290*e038c9c4Sjoerg const char *LocChars =
291*e038c9c4Sjoerg SM.getCharacterData(Loc.getLocWithOffset(-1), &Invalid);
292*e038c9c4Sjoerg assert(!Invalid &&
293*e038c9c4Sjoerg "Loc must be a valid character and not the first of the source file.");
294*e038c9c4Sjoerg if (isVerticalWhitespace(LocChars[0])) {
295*e038c9c4Sjoerg for (int i = 1; isWhitespace(LocChars[i]); ++i)
296*e038c9c4Sjoerg if (isVerticalWhitespace(LocChars[i]))
297*e038c9c4Sjoerg return true;
298*e038c9c4Sjoerg }
299*e038c9c4Sjoerg // We didn't find an empty line, so lex the next token, skipping past any
300*e038c9c4Sjoerg // whitespace we just scanned.
301*e038c9c4Sjoerg Token Tok;
302*e038c9c4Sjoerg bool Failed = Lexer::getRawToken(Loc, Tok, SM, LangOpts,
303*e038c9c4Sjoerg /*IgnoreWhiteSpace=*/true);
304*e038c9c4Sjoerg if (Failed)
305*e038c9c4Sjoerg // Any text that confuses the lexer seems fair to consider a separation.
306*e038c9c4Sjoerg return true;
307*e038c9c4Sjoerg
308*e038c9c4Sjoerg switch (Tok.getKind()) {
309*e038c9c4Sjoerg case tok::comment:
310*e038c9c4Sjoerg case tok::l_brace:
311*e038c9c4Sjoerg case tok::r_brace:
312*e038c9c4Sjoerg case tok::eof:
313*e038c9c4Sjoerg return true;
314*e038c9c4Sjoerg default:
315*e038c9c4Sjoerg return false;
316*e038c9c4Sjoerg }
317*e038c9c4Sjoerg }
318*e038c9c4Sjoerg
getAssociatedRange(const Decl & Decl,ASTContext & Context)319*e038c9c4Sjoerg CharSourceRange tooling::getAssociatedRange(const Decl &Decl,
320*e038c9c4Sjoerg ASTContext &Context) {
321*e038c9c4Sjoerg const SourceManager &SM = Context.getSourceManager();
322*e038c9c4Sjoerg const LangOptions &LangOpts = Context.getLangOpts();
323*e038c9c4Sjoerg CharSourceRange Range = CharSourceRange::getTokenRange(Decl.getSourceRange());
324*e038c9c4Sjoerg
325*e038c9c4Sjoerg // First, expand to the start of the template<> declaration if necessary.
326*e038c9c4Sjoerg if (const auto *Record = llvm::dyn_cast<CXXRecordDecl>(&Decl)) {
327*e038c9c4Sjoerg if (const auto *T = Record->getDescribedClassTemplate())
328*e038c9c4Sjoerg if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
329*e038c9c4Sjoerg Range.setBegin(T->getBeginLoc());
330*e038c9c4Sjoerg } else if (const auto *F = llvm::dyn_cast<FunctionDecl>(&Decl)) {
331*e038c9c4Sjoerg if (const auto *T = F->getDescribedFunctionTemplate())
332*e038c9c4Sjoerg if (SM.isBeforeInTranslationUnit(T->getBeginLoc(), Range.getBegin()))
333*e038c9c4Sjoerg Range.setBegin(T->getBeginLoc());
334*e038c9c4Sjoerg }
335*e038c9c4Sjoerg
336*e038c9c4Sjoerg // Next, expand the end location past trailing comments to include a potential
337*e038c9c4Sjoerg // newline at the end of the decl's line.
338*e038c9c4Sjoerg Range.setEnd(
339*e038c9c4Sjoerg getEntityEndLoc(SM, Decl.getEndLoc(), getTerminators(Decl), LangOpts));
340*e038c9c4Sjoerg Range.setTokenRange(false);
341*e038c9c4Sjoerg
342*e038c9c4Sjoerg // Expand to include preceeding associated comments. We ignore any comments
343*e038c9c4Sjoerg // that are not preceeding the decl, since we've already skipped trailing
344*e038c9c4Sjoerg // comments with getEntityEndLoc.
345*e038c9c4Sjoerg if (const RawComment *Comment =
346*e038c9c4Sjoerg Decl.getASTContext().getRawCommentForDeclNoCache(&Decl))
347*e038c9c4Sjoerg // Only include a preceding comment if:
348*e038c9c4Sjoerg // * it is *not* separate from the declaration (not including any newline
349*e038c9c4Sjoerg // that immediately follows the comment),
350*e038c9c4Sjoerg // * the decl *is* separate from any following entity (so, there are no
351*e038c9c4Sjoerg // other entities the comment could refer to), and
352*e038c9c4Sjoerg // * it is not a IfThisThenThat lint check.
353*e038c9c4Sjoerg if (SM.isBeforeInTranslationUnit(Comment->getBeginLoc(),
354*e038c9c4Sjoerg Range.getBegin()) &&
355*e038c9c4Sjoerg !atOrBeforeSeparation(
356*e038c9c4Sjoerg SM, skipWhitespaceAndNewline(SM, Comment->getEndLoc(), LangOpts),
357*e038c9c4Sjoerg LangOpts) &&
358*e038c9c4Sjoerg atOrBeforeSeparation(SM, Range.getEnd(), LangOpts)) {
359*e038c9c4Sjoerg const StringRef CommentText = Comment->getRawText(SM);
360*e038c9c4Sjoerg if (!CommentText.contains("LINT.IfChange") &&
361*e038c9c4Sjoerg !CommentText.contains("LINT.ThenChange"))
362*e038c9c4Sjoerg Range.setBegin(Comment->getBeginLoc());
363*e038c9c4Sjoerg }
364*e038c9c4Sjoerg // Add leading attributes.
365*e038c9c4Sjoerg for (auto *Attr : Decl.attrs()) {
366*e038c9c4Sjoerg if (Attr->getLocation().isInvalid() ||
367*e038c9c4Sjoerg !SM.isBeforeInTranslationUnit(Attr->getLocation(), Range.getBegin()))
368*e038c9c4Sjoerg continue;
369*e038c9c4Sjoerg Range.setBegin(Attr->getLocation());
370*e038c9c4Sjoerg
371*e038c9c4Sjoerg // Extend to the left '[[' or '__attribute((' if we saw the attribute,
372*e038c9c4Sjoerg // unless it is not a valid location.
373*e038c9c4Sjoerg bool Invalid;
374*e038c9c4Sjoerg StringRef Source =
375*e038c9c4Sjoerg SM.getBufferData(SM.getFileID(Range.getBegin()), &Invalid);
376*e038c9c4Sjoerg if (Invalid)
377*e038c9c4Sjoerg continue;
378*e038c9c4Sjoerg llvm::StringRef BeforeAttr =
379*e038c9c4Sjoerg Source.substr(0, SM.getFileOffset(Range.getBegin()));
380*e038c9c4Sjoerg llvm::StringRef BeforeAttrStripped = BeforeAttr.rtrim();
381*e038c9c4Sjoerg
382*e038c9c4Sjoerg for (llvm::StringRef Prefix : {"[[", "__attribute__(("}) {
383*e038c9c4Sjoerg // Handle whitespace between attribute prefix and attribute value.
384*e038c9c4Sjoerg if (BeforeAttrStripped.endswith(Prefix)) {
385*e038c9c4Sjoerg // Move start to start position of prefix, which is
386*e038c9c4Sjoerg // length(BeforeAttr) - length(BeforeAttrStripped) + length(Prefix)
387*e038c9c4Sjoerg // positions to the left.
388*e038c9c4Sjoerg Range.setBegin(Range.getBegin().getLocWithOffset(static_cast<int>(
389*e038c9c4Sjoerg -BeforeAttr.size() + BeforeAttrStripped.size() - Prefix.size())));
390*e038c9c4Sjoerg break;
391*e038c9c4Sjoerg // If we didn't see '[[' or '__attribute' it's probably coming from a
392*e038c9c4Sjoerg // macro expansion which is already handled by makeFileCharRange(),
393*e038c9c4Sjoerg // below.
394*e038c9c4Sjoerg }
395*e038c9c4Sjoerg }
396*e038c9c4Sjoerg }
397*e038c9c4Sjoerg
398*e038c9c4Sjoerg // Range.getEnd() is already fully un-expanded by getEntityEndLoc. But,
399*e038c9c4Sjoerg // Range.getBegin() may be inside an expansion.
400*e038c9c4Sjoerg return Lexer::makeFileCharRange(Range, SM, LangOpts);
4017330f729Sjoerg }
402