1e5dd7070Spatrick //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick
9e5dd7070Spatrick #include "clang/Tooling/Transformer/RangeSelector.h"
10e5dd7070Spatrick #include "clang/AST/Expr.h"
11*a9ac8606Spatrick #include "clang/AST/TypeLoc.h"
12e5dd7070Spatrick #include "clang/ASTMatchers/ASTMatchFinder.h"
13e5dd7070Spatrick #include "clang/Basic/SourceLocation.h"
14e5dd7070Spatrick #include "clang/Lex/Lexer.h"
15e5dd7070Spatrick #include "clang/Tooling/Transformer/SourceCode.h"
16e5dd7070Spatrick #include "llvm/ADT/StringRef.h"
17e5dd7070Spatrick #include "llvm/Support/Errc.h"
18e5dd7070Spatrick #include "llvm/Support/Error.h"
19e5dd7070Spatrick #include <string>
20e5dd7070Spatrick #include <utility>
21e5dd7070Spatrick #include <vector>
22e5dd7070Spatrick
23e5dd7070Spatrick using namespace clang;
24e5dd7070Spatrick using namespace transformer;
25e5dd7070Spatrick
26e5dd7070Spatrick using ast_matchers::MatchFinder;
27e5dd7070Spatrick using llvm::Error;
28e5dd7070Spatrick using llvm::StringError;
29e5dd7070Spatrick
30e5dd7070Spatrick using MatchResult = MatchFinder::MatchResult;
31e5dd7070Spatrick
invalidArgumentError(Twine Message)32e5dd7070Spatrick static Error invalidArgumentError(Twine Message) {
33e5dd7070Spatrick return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
34e5dd7070Spatrick }
35e5dd7070Spatrick
typeError(StringRef ID,const ASTNodeKind & Kind)36e5dd7070Spatrick static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
37e5dd7070Spatrick return invalidArgumentError("mismatched type (node id=" + ID +
38e5dd7070Spatrick " kind=" + Kind.asStringRef() + ")");
39e5dd7070Spatrick }
40e5dd7070Spatrick
typeError(StringRef ID,const ASTNodeKind & Kind,Twine ExpectedType)41e5dd7070Spatrick static Error typeError(StringRef ID, const ASTNodeKind &Kind,
42e5dd7070Spatrick Twine ExpectedType) {
43e5dd7070Spatrick return invalidArgumentError("mismatched type: expected one of " +
44e5dd7070Spatrick ExpectedType + " (node id=" + ID +
45e5dd7070Spatrick " kind=" + Kind.asStringRef() + ")");
46e5dd7070Spatrick }
47e5dd7070Spatrick
missingPropertyError(StringRef ID,Twine Description,StringRef Property)48e5dd7070Spatrick static Error missingPropertyError(StringRef ID, Twine Description,
49e5dd7070Spatrick StringRef Property) {
50e5dd7070Spatrick return invalidArgumentError(Description + " requires property '" + Property +
51e5dd7070Spatrick "' (node id=" + ID + ")");
52e5dd7070Spatrick }
53e5dd7070Spatrick
getNode(const ast_matchers::BoundNodes & Nodes,StringRef ID)54e5dd7070Spatrick static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes,
55e5dd7070Spatrick StringRef ID) {
56e5dd7070Spatrick auto &NodesMap = Nodes.getMap();
57e5dd7070Spatrick auto It = NodesMap.find(ID);
58e5dd7070Spatrick if (It == NodesMap.end())
59e5dd7070Spatrick return invalidArgumentError("ID not bound: " + ID);
60e5dd7070Spatrick return It->second;
61e5dd7070Spatrick }
62e5dd7070Spatrick
63e5dd7070Spatrick // FIXME: handling of macros should be configurable.
findPreviousTokenStart(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts)64e5dd7070Spatrick static SourceLocation findPreviousTokenStart(SourceLocation Start,
65e5dd7070Spatrick const SourceManager &SM,
66e5dd7070Spatrick const LangOptions &LangOpts) {
67e5dd7070Spatrick if (Start.isInvalid() || Start.isMacroID())
68e5dd7070Spatrick return SourceLocation();
69e5dd7070Spatrick
70e5dd7070Spatrick SourceLocation BeforeStart = Start.getLocWithOffset(-1);
71e5dd7070Spatrick if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
72e5dd7070Spatrick return SourceLocation();
73e5dd7070Spatrick
74e5dd7070Spatrick return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
75e5dd7070Spatrick }
76e5dd7070Spatrick
77e5dd7070Spatrick // Finds the start location of the previous token of kind \p TK.
78e5dd7070Spatrick // FIXME: handling of macros should be configurable.
findPreviousTokenKind(SourceLocation Start,const SourceManager & SM,const LangOptions & LangOpts,tok::TokenKind TK)79e5dd7070Spatrick static SourceLocation findPreviousTokenKind(SourceLocation Start,
80e5dd7070Spatrick const SourceManager &SM,
81e5dd7070Spatrick const LangOptions &LangOpts,
82e5dd7070Spatrick tok::TokenKind TK) {
83e5dd7070Spatrick while (true) {
84e5dd7070Spatrick SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
85e5dd7070Spatrick if (L.isInvalid() || L.isMacroID())
86e5dd7070Spatrick return SourceLocation();
87e5dd7070Spatrick
88e5dd7070Spatrick Token T;
89e5dd7070Spatrick if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
90e5dd7070Spatrick return SourceLocation();
91e5dd7070Spatrick
92e5dd7070Spatrick if (T.is(TK))
93e5dd7070Spatrick return T.getLocation();
94e5dd7070Spatrick
95e5dd7070Spatrick Start = L;
96e5dd7070Spatrick }
97e5dd7070Spatrick }
98e5dd7070Spatrick
findOpenParen(const CallExpr & E,const SourceManager & SM,const LangOptions & LangOpts)99e5dd7070Spatrick static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM,
100e5dd7070Spatrick const LangOptions &LangOpts) {
101e5dd7070Spatrick SourceLocation EndLoc =
102e5dd7070Spatrick E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(0)->getBeginLoc();
103e5dd7070Spatrick return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
104e5dd7070Spatrick }
105e5dd7070Spatrick
before(RangeSelector Selector)106e5dd7070Spatrick RangeSelector transformer::before(RangeSelector Selector) {
107e5dd7070Spatrick return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
108e5dd7070Spatrick Expected<CharSourceRange> SelectedRange = Selector(Result);
109e5dd7070Spatrick if (!SelectedRange)
110e5dd7070Spatrick return SelectedRange.takeError();
111e5dd7070Spatrick return CharSourceRange::getCharRange(SelectedRange->getBegin());
112e5dd7070Spatrick };
113e5dd7070Spatrick }
114e5dd7070Spatrick
after(RangeSelector Selector)115e5dd7070Spatrick RangeSelector transformer::after(RangeSelector Selector) {
116e5dd7070Spatrick return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
117e5dd7070Spatrick Expected<CharSourceRange> SelectedRange = Selector(Result);
118e5dd7070Spatrick if (!SelectedRange)
119e5dd7070Spatrick return SelectedRange.takeError();
120*a9ac8606Spatrick SourceLocation End = SelectedRange->getEnd();
121*a9ac8606Spatrick if (SelectedRange->isTokenRange()) {
122*a9ac8606Spatrick // We need to find the actual (exclusive) end location from which to
123*a9ac8606Spatrick // create a new source range. However, that's not guaranteed to be valid,
124*a9ac8606Spatrick // even if the token location itself is valid. So, we create a token range
125*a9ac8606Spatrick // consisting only of the last token, then map that range back to the
126*a9ac8606Spatrick // source file. If that succeeds, we have a valid location for the end of
127*a9ac8606Spatrick // the generated range.
128*a9ac8606Spatrick CharSourceRange Range = Lexer::makeFileCharRange(
129*a9ac8606Spatrick CharSourceRange::getTokenRange(SelectedRange->getEnd()),
130*a9ac8606Spatrick *Result.SourceManager, Result.Context->getLangOpts());
131*a9ac8606Spatrick if (Range.isInvalid())
132*a9ac8606Spatrick return invalidArgumentError(
133*a9ac8606Spatrick "after: can't resolve sub-range to valid source range");
134*a9ac8606Spatrick End = Range.getEnd();
135*a9ac8606Spatrick }
136*a9ac8606Spatrick
137*a9ac8606Spatrick return CharSourceRange::getCharRange(End);
138e5dd7070Spatrick };
139e5dd7070Spatrick }
140e5dd7070Spatrick
node(std::string ID)141e5dd7070Spatrick RangeSelector transformer::node(std::string ID) {
142e5dd7070Spatrick return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
143e5dd7070Spatrick Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
144e5dd7070Spatrick if (!Node)
145e5dd7070Spatrick return Node.takeError();
146*a9ac8606Spatrick return (Node->get<Decl>() != nullptr ||
147*a9ac8606Spatrick (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
148e5dd7070Spatrick ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
149e5dd7070Spatrick *Result.Context)
150e5dd7070Spatrick : CharSourceRange::getTokenRange(Node->getSourceRange());
151e5dd7070Spatrick };
152e5dd7070Spatrick }
153e5dd7070Spatrick
statement(std::string ID)154e5dd7070Spatrick RangeSelector transformer::statement(std::string ID) {
155e5dd7070Spatrick return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
156e5dd7070Spatrick Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
157e5dd7070Spatrick if (!Node)
158e5dd7070Spatrick return Node.takeError();
159e5dd7070Spatrick return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
160e5dd7070Spatrick *Result.Context);
161e5dd7070Spatrick };
162e5dd7070Spatrick }
163e5dd7070Spatrick
enclose(RangeSelector Begin,RangeSelector End)164ec727ea7Spatrick RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
165e5dd7070Spatrick return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
166e5dd7070Spatrick Expected<CharSourceRange> BeginRange = Begin(Result);
167e5dd7070Spatrick if (!BeginRange)
168e5dd7070Spatrick return BeginRange.takeError();
169e5dd7070Spatrick Expected<CharSourceRange> EndRange = End(Result);
170e5dd7070Spatrick if (!EndRange)
171e5dd7070Spatrick return EndRange.takeError();
172e5dd7070Spatrick SourceLocation B = BeginRange->getBegin();
173e5dd7070Spatrick SourceLocation E = EndRange->getEnd();
174e5dd7070Spatrick // Note: we are precluding the possibility of sub-token ranges in the case
175e5dd7070Spatrick // that EndRange is a token range.
176e5dd7070Spatrick if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
177e5dd7070Spatrick return invalidArgumentError("Bad range: out of order");
178e5dd7070Spatrick }
179e5dd7070Spatrick return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
180e5dd7070Spatrick };
181e5dd7070Spatrick }
182e5dd7070Spatrick
encloseNodes(std::string BeginID,std::string EndID)183ec727ea7Spatrick RangeSelector transformer::encloseNodes(std::string BeginID,
184ec727ea7Spatrick std::string EndID) {
185ec727ea7Spatrick return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
186e5dd7070Spatrick }
187e5dd7070Spatrick
member(std::string ID)188e5dd7070Spatrick RangeSelector transformer::member(std::string ID) {
189e5dd7070Spatrick return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
190e5dd7070Spatrick Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
191e5dd7070Spatrick if (!Node)
192e5dd7070Spatrick return Node.takeError();
193e5dd7070Spatrick if (auto *M = Node->get<clang::MemberExpr>())
194e5dd7070Spatrick return CharSourceRange::getTokenRange(
195e5dd7070Spatrick M->getMemberNameInfo().getSourceRange());
196e5dd7070Spatrick return typeError(ID, Node->getNodeKind(), "MemberExpr");
197e5dd7070Spatrick };
198e5dd7070Spatrick }
199e5dd7070Spatrick
name(std::string ID)200e5dd7070Spatrick RangeSelector transformer::name(std::string ID) {
201e5dd7070Spatrick return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
202e5dd7070Spatrick Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
203e5dd7070Spatrick if (!N)
204e5dd7070Spatrick return N.takeError();
205e5dd7070Spatrick auto &Node = *N;
206e5dd7070Spatrick if (const auto *D = Node.get<NamedDecl>()) {
207e5dd7070Spatrick if (!D->getDeclName().isIdentifier())
208e5dd7070Spatrick return missingPropertyError(ID, "name", "identifier");
209e5dd7070Spatrick SourceLocation L = D->getLocation();
210e5dd7070Spatrick auto R = CharSourceRange::getTokenRange(L, L);
211e5dd7070Spatrick // Verify that the range covers exactly the name.
212e5dd7070Spatrick // FIXME: extend this code to support cases like `operator +` or
213e5dd7070Spatrick // `foo<int>` for which this range will be too short. Doing so will
214e5dd7070Spatrick // require subcasing `NamedDecl`, because it doesn't provide virtual
215e5dd7070Spatrick // access to the \c DeclarationNameInfo.
216e5dd7070Spatrick if (tooling::getText(R, *Result.Context) != D->getName())
217e5dd7070Spatrick return CharSourceRange();
218e5dd7070Spatrick return R;
219e5dd7070Spatrick }
220e5dd7070Spatrick if (const auto *E = Node.get<DeclRefExpr>()) {
221e5dd7070Spatrick if (!E->getNameInfo().getName().isIdentifier())
222e5dd7070Spatrick return missingPropertyError(ID, "name", "identifier");
223e5dd7070Spatrick SourceLocation L = E->getLocation();
224e5dd7070Spatrick return CharSourceRange::getTokenRange(L, L);
225e5dd7070Spatrick }
226e5dd7070Spatrick if (const auto *I = Node.get<CXXCtorInitializer>()) {
227e5dd7070Spatrick if (!I->isMemberInitializer() && I->isWritten())
228e5dd7070Spatrick return missingPropertyError(ID, "name", "explicit member initializer");
229e5dd7070Spatrick SourceLocation L = I->getMemberLocation();
230e5dd7070Spatrick return CharSourceRange::getTokenRange(L, L);
231e5dd7070Spatrick }
232*a9ac8606Spatrick if (const auto *T = Node.get<TypeLoc>()) {
233*a9ac8606Spatrick TypeLoc Loc = *T;
234*a9ac8606Spatrick auto ET = Loc.getAs<ElaboratedTypeLoc>();
235*a9ac8606Spatrick if (!ET.isNull()) {
236*a9ac8606Spatrick Loc = ET.getNamedTypeLoc();
237*a9ac8606Spatrick }
238*a9ac8606Spatrick return CharSourceRange::getTokenRange(Loc.getSourceRange());
239*a9ac8606Spatrick }
240e5dd7070Spatrick return typeError(ID, Node.getNodeKind(),
241*a9ac8606Spatrick "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc");
242e5dd7070Spatrick };
243e5dd7070Spatrick }
244e5dd7070Spatrick
245e5dd7070Spatrick namespace {
246e5dd7070Spatrick // FIXME: make this available in the public API for users to easily create their
247e5dd7070Spatrick // own selectors.
248e5dd7070Spatrick
249e5dd7070Spatrick // Creates a selector from a range-selection function \p Func, which selects a
250e5dd7070Spatrick // range that is relative to a bound node id. \c T is the node type expected by
251e5dd7070Spatrick // \p Func.
252e5dd7070Spatrick template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
253e5dd7070Spatrick class RelativeSelector {
254e5dd7070Spatrick std::string ID;
255e5dd7070Spatrick
256e5dd7070Spatrick public:
RelativeSelector(std::string ID)257e5dd7070Spatrick RelativeSelector(std::string ID) : ID(std::move(ID)) {}
258e5dd7070Spatrick
operator ()(const MatchResult & Result)259e5dd7070Spatrick Expected<CharSourceRange> operator()(const MatchResult &Result) {
260e5dd7070Spatrick Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
261e5dd7070Spatrick if (!N)
262e5dd7070Spatrick return N.takeError();
263e5dd7070Spatrick if (const auto *Arg = N->get<T>())
264e5dd7070Spatrick return Func(Result, *Arg);
265e5dd7070Spatrick return typeError(ID, N->getNodeKind());
266e5dd7070Spatrick }
267e5dd7070Spatrick };
268e5dd7070Spatrick } // namespace
269e5dd7070Spatrick
270e5dd7070Spatrick // FIXME: Change the following functions from being in an anonymous namespace
271e5dd7070Spatrick // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
272e5dd7070Spatrick // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
273e5dd7070Spatrick // namespace works around a bug in earlier versions.
274e5dd7070Spatrick namespace {
275e5dd7070Spatrick // Returns the range of the statements (all source between the braces).
getStatementsRange(const MatchResult &,const CompoundStmt & CS)276e5dd7070Spatrick CharSourceRange getStatementsRange(const MatchResult &,
277e5dd7070Spatrick const CompoundStmt &CS) {
278e5dd7070Spatrick return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1),
279e5dd7070Spatrick CS.getRBracLoc());
280e5dd7070Spatrick }
281e5dd7070Spatrick } // namespace
282e5dd7070Spatrick
statements(std::string ID)283e5dd7070Spatrick RangeSelector transformer::statements(std::string ID) {
284e5dd7070Spatrick return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
285e5dd7070Spatrick }
286e5dd7070Spatrick
287e5dd7070Spatrick namespace {
288e5dd7070Spatrick // Returns the range of the source between the call's parentheses.
getCallArgumentsRange(const MatchResult & Result,const CallExpr & CE)289e5dd7070Spatrick CharSourceRange getCallArgumentsRange(const MatchResult &Result,
290e5dd7070Spatrick const CallExpr &CE) {
291e5dd7070Spatrick return CharSourceRange::getCharRange(
292e5dd7070Spatrick findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts())
293e5dd7070Spatrick .getLocWithOffset(1),
294e5dd7070Spatrick CE.getRParenLoc());
295e5dd7070Spatrick }
296e5dd7070Spatrick } // namespace
297e5dd7070Spatrick
callArgs(std::string ID)298e5dd7070Spatrick RangeSelector transformer::callArgs(std::string ID) {
299e5dd7070Spatrick return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
300e5dd7070Spatrick }
301e5dd7070Spatrick
302e5dd7070Spatrick namespace {
303e5dd7070Spatrick // Returns the range of the elements of the initializer list. Includes all
304e5dd7070Spatrick // source between the braces.
getElementsRange(const MatchResult &,const InitListExpr & E)305e5dd7070Spatrick CharSourceRange getElementsRange(const MatchResult &,
306e5dd7070Spatrick const InitListExpr &E) {
307e5dd7070Spatrick return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1),
308e5dd7070Spatrick E.getRBraceLoc());
309e5dd7070Spatrick }
310e5dd7070Spatrick } // namespace
311e5dd7070Spatrick
initListElements(std::string ID)312e5dd7070Spatrick RangeSelector transformer::initListElements(std::string ID) {
313e5dd7070Spatrick return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
314e5dd7070Spatrick }
315e5dd7070Spatrick
316e5dd7070Spatrick namespace {
317e5dd7070Spatrick // Returns the range of the else branch, including the `else` keyword.
getElseRange(const MatchResult & Result,const IfStmt & S)318e5dd7070Spatrick CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
319e5dd7070Spatrick return tooling::maybeExtendRange(
320e5dd7070Spatrick CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()),
321e5dd7070Spatrick tok::TokenKind::semi, *Result.Context);
322e5dd7070Spatrick }
323e5dd7070Spatrick } // namespace
324e5dd7070Spatrick
elseBranch(std::string ID)325e5dd7070Spatrick RangeSelector transformer::elseBranch(std::string ID) {
326e5dd7070Spatrick return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
327e5dd7070Spatrick }
328e5dd7070Spatrick
expansion(RangeSelector S)329e5dd7070Spatrick RangeSelector transformer::expansion(RangeSelector S) {
330e5dd7070Spatrick return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
331e5dd7070Spatrick Expected<CharSourceRange> SRange = S(Result);
332e5dd7070Spatrick if (!SRange)
333e5dd7070Spatrick return SRange.takeError();
334e5dd7070Spatrick return Result.SourceManager->getExpansionRange(*SRange);
335e5dd7070Spatrick };
336e5dd7070Spatrick }
337