xref: /freebsd-src/contrib/llvm-project/clang/lib/Tooling/Transformer/RangeSelector.cpp (revision 6e4dbb7feff42275c3025dbc25d6ea6cce4e5fd7)
1 //===--- RangeSelector.cpp - RangeSelector implementations ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Transformer/RangeSelector.h"
10 #include "clang/AST/Expr.h"
11 #include "clang/AST/TypeLoc.h"
12 #include "clang/ASTMatchers/ASTMatchFinder.h"
13 #include "clang/Basic/SourceLocation.h"
14 #include "clang/Lex/Lexer.h"
15 #include "clang/Tooling/Transformer/SourceCode.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/Errc.h"
18 #include "llvm/Support/Error.h"
19 #include <string>
20 #include <utility>
21 #include <vector>
22 
23 using namespace clang;
24 using namespace transformer;
25 
26 using ast_matchers::MatchFinder;
27 using llvm::Error;
28 using llvm::StringError;
29 
30 using MatchResult = MatchFinder::MatchResult;
31 
32 static Error invalidArgumentError(Twine Message) {
33   return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
34 }
35 
36 static Error typeError(StringRef ID, const ASTNodeKind &Kind) {
37   return invalidArgumentError("mismatched type (node id=" + ID +
38                               " kind=" + Kind.asStringRef() + ")");
39 }
40 
41 static Error typeError(StringRef ID, const ASTNodeKind &Kind,
42                        Twine ExpectedType) {
43   return invalidArgumentError("mismatched type: expected one of " +
44                               ExpectedType + " (node id=" + ID +
45                               " kind=" + Kind.asStringRef() + ")");
46 }
47 
48 static Error missingPropertyError(StringRef ID, Twine Description,
49                                   StringRef Property) {
50   return invalidArgumentError(Description + " requires property '" + Property +
51                               "' (node id=" + ID + ")");
52 }
53 
54 static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes,
55                                       StringRef ID) {
56   auto &NodesMap = Nodes.getMap();
57   auto It = NodesMap.find(ID);
58   if (It == NodesMap.end())
59     return invalidArgumentError("ID not bound: " + ID);
60   return It->second;
61 }
62 
63 // FIXME: handling of macros should be configurable.
64 static SourceLocation findPreviousTokenStart(SourceLocation Start,
65                                              const SourceManager &SM,
66                                              const LangOptions &LangOpts) {
67   if (Start.isInvalid() || Start.isMacroID())
68     return SourceLocation();
69 
70   SourceLocation BeforeStart = Start.getLocWithOffset(-1);
71   if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
72     return SourceLocation();
73 
74   return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
75 }
76 
77 // Finds the start location of the previous token of kind \p TK.
78 // FIXME: handling of macros should be configurable.
79 static SourceLocation findPreviousTokenKind(SourceLocation Start,
80                                             const SourceManager &SM,
81                                             const LangOptions &LangOpts,
82                                             tok::TokenKind TK) {
83   while (true) {
84     SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
85     if (L.isInvalid() || L.isMacroID())
86       return SourceLocation();
87 
88     Token T;
89     if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
90       return SourceLocation();
91 
92     if (T.is(TK))
93       return T.getLocation();
94 
95     Start = L;
96   }
97 }
98 
99 static SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM,
100                                     const LangOptions &LangOpts) {
101   SourceLocation EndLoc =
102       E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(0)->getBeginLoc();
103   return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
104 }
105 
106 RangeSelector transformer::before(RangeSelector Selector) {
107   return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
108     Expected<CharSourceRange> SelectedRange = Selector(Result);
109     if (!SelectedRange)
110       return SelectedRange.takeError();
111     return CharSourceRange::getCharRange(SelectedRange->getBegin());
112   };
113 }
114 
115 RangeSelector transformer::after(RangeSelector Selector) {
116   return [Selector](const MatchResult &Result) -> Expected<CharSourceRange> {
117     Expected<CharSourceRange> SelectedRange = Selector(Result);
118     if (!SelectedRange)
119       return SelectedRange.takeError();
120     SourceLocation End = SelectedRange->getEnd();
121     if (SelectedRange->isTokenRange()) {
122       // We need to find the actual (exclusive) end location from which to
123       // create a new source range. However, that's not guaranteed to be valid,
124       // even if the token location itself is valid. So, we create a token range
125       // consisting only of the last token, then map that range back to the
126       // source file. If that succeeds, we have a valid location for the end of
127       // the generated range.
128       CharSourceRange Range = Lexer::makeFileCharRange(
129           CharSourceRange::getTokenRange(SelectedRange->getEnd()),
130           *Result.SourceManager, Result.Context->getLangOpts());
131       if (Range.isInvalid())
132         return invalidArgumentError(
133             "after: can't resolve sub-range to valid source range");
134       End = Range.getEnd();
135     }
136 
137     return CharSourceRange::getCharRange(End);
138   };
139 }
140 
141 RangeSelector transformer::node(std::string ID) {
142   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
143     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
144     if (!Node)
145       return Node.takeError();
146     return (Node->get<Decl>() != nullptr ||
147             (Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr))
148                ? tooling::getExtendedRange(*Node, tok::TokenKind::semi,
149                                            *Result.Context)
150                : CharSourceRange::getTokenRange(Node->getSourceRange());
151   };
152 }
153 
154 RangeSelector transformer::statement(std::string ID) {
155   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
156     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
157     if (!Node)
158       return Node.takeError();
159     return tooling::getExtendedRange(*Node, tok::TokenKind::semi,
160                                      *Result.Context);
161   };
162 }
163 
164 RangeSelector transformer::enclose(RangeSelector Begin, RangeSelector End) {
165   return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
166     Expected<CharSourceRange> BeginRange = Begin(Result);
167     if (!BeginRange)
168       return BeginRange.takeError();
169     Expected<CharSourceRange> EndRange = End(Result);
170     if (!EndRange)
171       return EndRange.takeError();
172     SourceLocation B = BeginRange->getBegin();
173     SourceLocation E = EndRange->getEnd();
174     // Note: we are precluding the possibility of sub-token ranges in the case
175     // that EndRange is a token range.
176     if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
177       return invalidArgumentError("Bad range: out of order");
178     }
179     return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
180   };
181 }
182 
183 RangeSelector transformer::encloseNodes(std::string BeginID,
184                                         std::string EndID) {
185   return transformer::enclose(node(std::move(BeginID)), node(std::move(EndID)));
186 }
187 
188 RangeSelector transformer::member(std::string ID) {
189   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
190     Expected<DynTypedNode> Node = getNode(Result.Nodes, ID);
191     if (!Node)
192       return Node.takeError();
193     if (auto *M = Node->get<clang::MemberExpr>())
194       return CharSourceRange::getTokenRange(
195           M->getMemberNameInfo().getSourceRange());
196     return typeError(ID, Node->getNodeKind(), "MemberExpr");
197   };
198 }
199 
200 RangeSelector transformer::name(std::string ID) {
201   return [ID](const MatchResult &Result) -> Expected<CharSourceRange> {
202     Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
203     if (!N)
204       return N.takeError();
205     auto &Node = *N;
206     if (const auto *D = Node.get<NamedDecl>()) {
207       if (!D->getDeclName().isIdentifier())
208         return missingPropertyError(ID, "name", "identifier");
209       SourceLocation L = D->getLocation();
210       auto R = CharSourceRange::getTokenRange(L, L);
211       // Verify that the range covers exactly the name.
212       // FIXME: extend this code to support cases like `operator +` or
213       // `foo<int>` for which this range will be too short.  Doing so will
214       // require subcasing `NamedDecl`, because it doesn't provide virtual
215       // access to the \c DeclarationNameInfo.
216       if (tooling::getText(R, *Result.Context) != D->getName())
217         return CharSourceRange();
218       return R;
219     }
220     if (const auto *E = Node.get<DeclRefExpr>()) {
221       if (!E->getNameInfo().getName().isIdentifier())
222         return missingPropertyError(ID, "name", "identifier");
223       SourceLocation L = E->getLocation();
224       return CharSourceRange::getTokenRange(L, L);
225     }
226     if (const auto *I = Node.get<CXXCtorInitializer>()) {
227       if (!I->isMemberInitializer() && I->isWritten())
228         return missingPropertyError(ID, "name", "explicit member initializer");
229       SourceLocation L = I->getMemberLocation();
230       return CharSourceRange::getTokenRange(L, L);
231     }
232     if (const auto *T = Node.get<TypeLoc>()) {
233       TypeLoc Loc = *T;
234       auto ET = Loc.getAs<ElaboratedTypeLoc>();
235       if (!ET.isNull()) {
236         Loc = ET.getNamedTypeLoc();
237       }
238       return CharSourceRange::getTokenRange(Loc.getSourceRange());
239     }
240     return typeError(ID, Node.getNodeKind(),
241                      "DeclRefExpr, NamedDecl, CXXCtorInitializer, TypeLoc");
242   };
243 }
244 
245 namespace {
246 // FIXME: make this available in the public API for users to easily create their
247 // own selectors.
248 
249 // Creates a selector from a range-selection function \p Func, which selects a
250 // range that is relative to a bound node id.  \c T is the node type expected by
251 // \p Func.
252 template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
253 class RelativeSelector {
254   std::string ID;
255 
256 public:
257   RelativeSelector(std::string ID) : ID(std::move(ID)) {}
258 
259   Expected<CharSourceRange> operator()(const MatchResult &Result) {
260     Expected<DynTypedNode> N = getNode(Result.Nodes, ID);
261     if (!N)
262       return N.takeError();
263     if (const auto *Arg = N->get<T>())
264       return Func(Result, *Arg);
265     return typeError(ID, N->getNodeKind());
266   }
267 };
268 } // namespace
269 
270 // FIXME: Change the following functions from being in an anonymous namespace
271 // to static functions, after the minimum Visual C++ has _MSC_VER >= 1915
272 // (equivalent to Visual Studio 2017 v15.8 or higher). Using the anonymous
273 // namespace works around a bug in earlier versions.
274 namespace {
275 // Returns the range of the statements (all source between the braces).
276 CharSourceRange getStatementsRange(const MatchResult &,
277                                    const CompoundStmt &CS) {
278   return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1),
279                                        CS.getRBracLoc());
280 }
281 } // namespace
282 
283 RangeSelector transformer::statements(std::string ID) {
284   return RelativeSelector<CompoundStmt, getStatementsRange>(std::move(ID));
285 }
286 
287 namespace {
288 // Returns the range of the source between the call's parentheses.
289 CharSourceRange getCallArgumentsRange(const MatchResult &Result,
290                                       const CallExpr &CE) {
291   return CharSourceRange::getCharRange(
292       findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts())
293           .getLocWithOffset(1),
294       CE.getRParenLoc());
295 }
296 } // namespace
297 
298 RangeSelector transformer::callArgs(std::string ID) {
299   return RelativeSelector<CallExpr, getCallArgumentsRange>(std::move(ID));
300 }
301 
302 namespace {
303 // Returns the range of the elements of the initializer list. Includes all
304 // source between the braces.
305 CharSourceRange getElementsRange(const MatchResult &,
306                                  const InitListExpr &E) {
307   return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1),
308                                        E.getRBraceLoc());
309 }
310 } // namespace
311 
312 RangeSelector transformer::initListElements(std::string ID) {
313   return RelativeSelector<InitListExpr, getElementsRange>(std::move(ID));
314 }
315 
316 namespace {
317 // Returns the range of the else branch, including the `else` keyword.
318 CharSourceRange getElseRange(const MatchResult &Result, const IfStmt &S) {
319   return tooling::maybeExtendRange(
320       CharSourceRange::getTokenRange(S.getElseLoc(), S.getEndLoc()),
321       tok::TokenKind::semi, *Result.Context);
322 }
323 } // namespace
324 
325 RangeSelector transformer::elseBranch(std::string ID) {
326   return RelativeSelector<IfStmt, getElseRange>(std::move(ID));
327 }
328 
329 RangeSelector transformer::expansion(RangeSelector S) {
330   return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
331     Expected<CharSourceRange> SRange = S(Result);
332     if (!SRange)
333       return SRange.takeError();
334     return Result.SourceManager->getExpansionRange(*SRange);
335   };
336 }
337