1*e038c9c4Sjoerg //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2*e038c9c4Sjoerg //
3*e038c9c4Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*e038c9c4Sjoerg // See https://llvm.org/LICENSE.txt for license information.
5*e038c9c4Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*e038c9c4Sjoerg //
7*e038c9c4Sjoerg //===----------------------------------------------------------------------===//
8*e038c9c4Sjoerg
9*e038c9c4Sjoerg #include "clang/Tooling/Transformer/Parsing.h"
10*e038c9c4Sjoerg #include "clang/AST/Expr.h"
11*e038c9c4Sjoerg #include "clang/ASTMatchers/ASTMatchFinder.h"
12*e038c9c4Sjoerg #include "clang/Basic/CharInfo.h"
13*e038c9c4Sjoerg #include "clang/Basic/SourceLocation.h"
14*e038c9c4Sjoerg #include "clang/Lex/Lexer.h"
15*e038c9c4Sjoerg #include "clang/Tooling/Transformer/RangeSelector.h"
16*e038c9c4Sjoerg #include "clang/Tooling/Transformer/SourceCode.h"
17*e038c9c4Sjoerg #include "llvm/ADT/None.h"
18*e038c9c4Sjoerg #include "llvm/ADT/StringMap.h"
19*e038c9c4Sjoerg #include "llvm/ADT/StringRef.h"
20*e038c9c4Sjoerg #include "llvm/Support/Errc.h"
21*e038c9c4Sjoerg #include "llvm/Support/Error.h"
22*e038c9c4Sjoerg #include <string>
23*e038c9c4Sjoerg #include <utility>
24*e038c9c4Sjoerg #include <vector>
25*e038c9c4Sjoerg
26*e038c9c4Sjoerg using namespace clang;
27*e038c9c4Sjoerg using namespace transformer;
28*e038c9c4Sjoerg
29*e038c9c4Sjoerg // FIXME: This implementation is entirely separate from that of the AST
30*e038c9c4Sjoerg // matchers. Given the similarity of the languages and uses of the two parsers,
31*e038c9c4Sjoerg // the two should share a common parsing infrastructure, as should other
32*e038c9c4Sjoerg // Transformer types. We intend to unify this implementation soon to share as
33*e038c9c4Sjoerg // much as possible with the AST Matchers parsing.
34*e038c9c4Sjoerg
35*e038c9c4Sjoerg namespace {
36*e038c9c4Sjoerg using llvm::Error;
37*e038c9c4Sjoerg using llvm::Expected;
38*e038c9c4Sjoerg
39*e038c9c4Sjoerg template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
40*e038c9c4Sjoerg
41*e038c9c4Sjoerg struct ParseState {
42*e038c9c4Sjoerg // The remaining input to be processed.
43*e038c9c4Sjoerg StringRef Input;
44*e038c9c4Sjoerg // The original input. Not modified during parsing; only for reference in
45*e038c9c4Sjoerg // error reporting.
46*e038c9c4Sjoerg StringRef OriginalInput;
47*e038c9c4Sjoerg };
48*e038c9c4Sjoerg
49*e038c9c4Sjoerg // Represents an intermediate result returned by a parsing function. Functions
50*e038c9c4Sjoerg // that don't generate values should use `llvm::None`
51*e038c9c4Sjoerg template <typename ResultType> struct ParseProgress {
52*e038c9c4Sjoerg ParseState State;
53*e038c9c4Sjoerg // Intermediate result generated by the Parser.
54*e038c9c4Sjoerg ResultType Value;
55*e038c9c4Sjoerg };
56*e038c9c4Sjoerg
57*e038c9c4Sjoerg template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
58*e038c9c4Sjoerg template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
59*e038c9c4Sjoerg
60*e038c9c4Sjoerg class ParseError : public llvm::ErrorInfo<ParseError> {
61*e038c9c4Sjoerg public:
62*e038c9c4Sjoerg // Required field for all ErrorInfo derivatives.
63*e038c9c4Sjoerg static char ID;
64*e038c9c4Sjoerg
ParseError(size_t Pos,std::string ErrorMsg,std::string InputExcerpt)65*e038c9c4Sjoerg ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
66*e038c9c4Sjoerg : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
67*e038c9c4Sjoerg Excerpt(std::move(InputExcerpt)) {}
68*e038c9c4Sjoerg
log(llvm::raw_ostream & OS) const69*e038c9c4Sjoerg void log(llvm::raw_ostream &OS) const override {
70*e038c9c4Sjoerg OS << "parse error at position (" << Pos << "): " << ErrorMsg
71*e038c9c4Sjoerg << ": " + Excerpt;
72*e038c9c4Sjoerg }
73*e038c9c4Sjoerg
convertToErrorCode() const74*e038c9c4Sjoerg std::error_code convertToErrorCode() const override {
75*e038c9c4Sjoerg return llvm::inconvertibleErrorCode();
76*e038c9c4Sjoerg }
77*e038c9c4Sjoerg
78*e038c9c4Sjoerg // Position of the error in the input string.
79*e038c9c4Sjoerg size_t Pos;
80*e038c9c4Sjoerg std::string ErrorMsg;
81*e038c9c4Sjoerg // Excerpt of the input starting at the error position.
82*e038c9c4Sjoerg std::string Excerpt;
83*e038c9c4Sjoerg };
84*e038c9c4Sjoerg
85*e038c9c4Sjoerg char ParseError::ID;
86*e038c9c4Sjoerg } // namespace
87*e038c9c4Sjoerg
88*e038c9c4Sjoerg static const llvm::StringMap<RangeSelectorOp<std::string>> &
getUnaryStringSelectors()89*e038c9c4Sjoerg getUnaryStringSelectors() {
90*e038c9c4Sjoerg static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
91*e038c9c4Sjoerg {"name", name},
92*e038c9c4Sjoerg {"node", node},
93*e038c9c4Sjoerg {"statement", statement},
94*e038c9c4Sjoerg {"statements", statements},
95*e038c9c4Sjoerg {"member", member},
96*e038c9c4Sjoerg {"callArgs", callArgs},
97*e038c9c4Sjoerg {"elseBranch", elseBranch},
98*e038c9c4Sjoerg {"initListElements", initListElements}};
99*e038c9c4Sjoerg return M;
100*e038c9c4Sjoerg }
101*e038c9c4Sjoerg
102*e038c9c4Sjoerg static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
getUnaryRangeSelectors()103*e038c9c4Sjoerg getUnaryRangeSelectors() {
104*e038c9c4Sjoerg static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
105*e038c9c4Sjoerg {"before", before}, {"after", after}, {"expansion", expansion}};
106*e038c9c4Sjoerg return M;
107*e038c9c4Sjoerg }
108*e038c9c4Sjoerg
109*e038c9c4Sjoerg static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
getBinaryStringSelectors()110*e038c9c4Sjoerg getBinaryStringSelectors() {
111*e038c9c4Sjoerg static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
112*e038c9c4Sjoerg {"encloseNodes", encloseNodes}};
113*e038c9c4Sjoerg return M;
114*e038c9c4Sjoerg }
115*e038c9c4Sjoerg
116*e038c9c4Sjoerg static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
getBinaryRangeSelectors()117*e038c9c4Sjoerg getBinaryRangeSelectors() {
118*e038c9c4Sjoerg static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
119*e038c9c4Sjoerg M = {{"enclose", enclose}, {"between", between}};
120*e038c9c4Sjoerg return M;
121*e038c9c4Sjoerg }
122*e038c9c4Sjoerg
123*e038c9c4Sjoerg template <typename Element>
findOptional(const llvm::StringMap<Element> & Map,llvm::StringRef Key)124*e038c9c4Sjoerg llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
125*e038c9c4Sjoerg llvm::StringRef Key) {
126*e038c9c4Sjoerg auto it = Map.find(Key);
127*e038c9c4Sjoerg if (it == Map.end())
128*e038c9c4Sjoerg return llvm::None;
129*e038c9c4Sjoerg return it->second;
130*e038c9c4Sjoerg }
131*e038c9c4Sjoerg
132*e038c9c4Sjoerg template <typename ResultType>
makeParseProgress(ParseState State,ResultType Result)133*e038c9c4Sjoerg ParseProgress<ResultType> makeParseProgress(ParseState State,
134*e038c9c4Sjoerg ResultType Result) {
135*e038c9c4Sjoerg return ParseProgress<ResultType>{State, std::move(Result)};
136*e038c9c4Sjoerg }
137*e038c9c4Sjoerg
makeParseError(const ParseState & S,std::string ErrorMsg)138*e038c9c4Sjoerg static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
139*e038c9c4Sjoerg size_t Pos = S.OriginalInput.size() - S.Input.size();
140*e038c9c4Sjoerg return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
141*e038c9c4Sjoerg S.OriginalInput.substr(Pos, 20).str());
142*e038c9c4Sjoerg }
143*e038c9c4Sjoerg
144*e038c9c4Sjoerg // Returns a new ParseState that advances \c S by \c N characters.
advance(ParseState S,size_t N)145*e038c9c4Sjoerg static ParseState advance(ParseState S, size_t N) {
146*e038c9c4Sjoerg S.Input = S.Input.drop_front(N);
147*e038c9c4Sjoerg return S;
148*e038c9c4Sjoerg }
149*e038c9c4Sjoerg
consumeWhitespace(StringRef S)150*e038c9c4Sjoerg static StringRef consumeWhitespace(StringRef S) {
151*e038c9c4Sjoerg return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
152*e038c9c4Sjoerg }
153*e038c9c4Sjoerg
154*e038c9c4Sjoerg // Parses a single expected character \c c from \c State, skipping preceding
155*e038c9c4Sjoerg // whitespace. Error if the expected character isn't found.
parseChar(char c,ParseState State)156*e038c9c4Sjoerg static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
157*e038c9c4Sjoerg State.Input = consumeWhitespace(State.Input);
158*e038c9c4Sjoerg if (State.Input.empty() || State.Input.front() != c)
159*e038c9c4Sjoerg return makeParseError(State,
160*e038c9c4Sjoerg ("expected char not found: " + llvm::Twine(c)).str());
161*e038c9c4Sjoerg return makeParseProgress(advance(State, 1), llvm::None);
162*e038c9c4Sjoerg }
163*e038c9c4Sjoerg
164*e038c9c4Sjoerg // Parses an identitifer "token" -- handles preceding whitespace.
parseId(ParseState State)165*e038c9c4Sjoerg static ExpectedProgress<std::string> parseId(ParseState State) {
166*e038c9c4Sjoerg State.Input = consumeWhitespace(State.Input);
167*e038c9c4Sjoerg auto Id = State.Input.take_while(
168*e038c9c4Sjoerg [](char c) { return isASCII(c) && isIdentifierBody(c); });
169*e038c9c4Sjoerg if (Id.empty())
170*e038c9c4Sjoerg return makeParseError(State, "failed to parse name");
171*e038c9c4Sjoerg return makeParseProgress(advance(State, Id.size()), Id.str());
172*e038c9c4Sjoerg }
173*e038c9c4Sjoerg
174*e038c9c4Sjoerg // For consistency with the AST matcher parser and C++ code, node ids are
175*e038c9c4Sjoerg // written as strings. However, we do not support escaping in the string.
parseStringId(ParseState State)176*e038c9c4Sjoerg static ExpectedProgress<std::string> parseStringId(ParseState State) {
177*e038c9c4Sjoerg State.Input = consumeWhitespace(State.Input);
178*e038c9c4Sjoerg if (State.Input.empty())
179*e038c9c4Sjoerg return makeParseError(State, "unexpected end of input");
180*e038c9c4Sjoerg if (!State.Input.consume_front("\""))
181*e038c9c4Sjoerg return makeParseError(
182*e038c9c4Sjoerg State,
183*e038c9c4Sjoerg "expecting string, but encountered other character or end of input");
184*e038c9c4Sjoerg
185*e038c9c4Sjoerg StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
186*e038c9c4Sjoerg if (State.Input.size() == Id.size())
187*e038c9c4Sjoerg return makeParseError(State, "unterminated string");
188*e038c9c4Sjoerg // Advance past the trailing quote as well.
189*e038c9c4Sjoerg return makeParseProgress(advance(State, Id.size() + 1), Id.str());
190*e038c9c4Sjoerg }
191*e038c9c4Sjoerg
192*e038c9c4Sjoerg // Parses a single element surrounded by parens. `Op` is applied to the parsed
193*e038c9c4Sjoerg // result to create the result of this function call.
194*e038c9c4Sjoerg template <typename T>
parseSingle(ParseFunction<T> ParseElement,RangeSelectorOp<T> Op,ParseState State)195*e038c9c4Sjoerg ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
196*e038c9c4Sjoerg RangeSelectorOp<T> Op,
197*e038c9c4Sjoerg ParseState State) {
198*e038c9c4Sjoerg auto P = parseChar('(', State);
199*e038c9c4Sjoerg if (!P)
200*e038c9c4Sjoerg return P.takeError();
201*e038c9c4Sjoerg
202*e038c9c4Sjoerg auto E = ParseElement(P->State);
203*e038c9c4Sjoerg if (!E)
204*e038c9c4Sjoerg return E.takeError();
205*e038c9c4Sjoerg
206*e038c9c4Sjoerg P = parseChar(')', E->State);
207*e038c9c4Sjoerg if (!P)
208*e038c9c4Sjoerg return P.takeError();
209*e038c9c4Sjoerg
210*e038c9c4Sjoerg return makeParseProgress(P->State, Op(std::move(E->Value)));
211*e038c9c4Sjoerg }
212*e038c9c4Sjoerg
213*e038c9c4Sjoerg // Parses a pair of elements surrounded by parens and separated by comma. `Op`
214*e038c9c4Sjoerg // is applied to the parsed results to create the result of this function call.
215*e038c9c4Sjoerg template <typename T>
parsePair(ParseFunction<T> ParseElement,RangeSelectorOp<T,T> Op,ParseState State)216*e038c9c4Sjoerg ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
217*e038c9c4Sjoerg RangeSelectorOp<T, T> Op,
218*e038c9c4Sjoerg ParseState State) {
219*e038c9c4Sjoerg auto P = parseChar('(', State);
220*e038c9c4Sjoerg if (!P)
221*e038c9c4Sjoerg return P.takeError();
222*e038c9c4Sjoerg
223*e038c9c4Sjoerg auto Left = ParseElement(P->State);
224*e038c9c4Sjoerg if (!Left)
225*e038c9c4Sjoerg return Left.takeError();
226*e038c9c4Sjoerg
227*e038c9c4Sjoerg P = parseChar(',', Left->State);
228*e038c9c4Sjoerg if (!P)
229*e038c9c4Sjoerg return P.takeError();
230*e038c9c4Sjoerg
231*e038c9c4Sjoerg auto Right = ParseElement(P->State);
232*e038c9c4Sjoerg if (!Right)
233*e038c9c4Sjoerg return Right.takeError();
234*e038c9c4Sjoerg
235*e038c9c4Sjoerg P = parseChar(')', Right->State);
236*e038c9c4Sjoerg if (!P)
237*e038c9c4Sjoerg return P.takeError();
238*e038c9c4Sjoerg
239*e038c9c4Sjoerg return makeParseProgress(P->State,
240*e038c9c4Sjoerg Op(std::move(Left->Value), std::move(Right->Value)));
241*e038c9c4Sjoerg }
242*e038c9c4Sjoerg
243*e038c9c4Sjoerg // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
244*e038c9c4Sjoerg // Id operator). Returns StencilType representing the operator on success and
245*e038c9c4Sjoerg // error if it fails to parse input for an operator.
246*e038c9c4Sjoerg static ExpectedProgress<RangeSelector>
parseRangeSelectorImpl(ParseState State)247*e038c9c4Sjoerg parseRangeSelectorImpl(ParseState State) {
248*e038c9c4Sjoerg auto Id = parseId(State);
249*e038c9c4Sjoerg if (!Id)
250*e038c9c4Sjoerg return Id.takeError();
251*e038c9c4Sjoerg
252*e038c9c4Sjoerg std::string OpName = std::move(Id->Value);
253*e038c9c4Sjoerg if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
254*e038c9c4Sjoerg return parseSingle(parseStringId, *Op, Id->State);
255*e038c9c4Sjoerg
256*e038c9c4Sjoerg if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
257*e038c9c4Sjoerg return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
258*e038c9c4Sjoerg
259*e038c9c4Sjoerg if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
260*e038c9c4Sjoerg return parsePair(parseStringId, *Op, Id->State);
261*e038c9c4Sjoerg
262*e038c9c4Sjoerg if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
263*e038c9c4Sjoerg return parsePair(parseRangeSelectorImpl, *Op, Id->State);
264*e038c9c4Sjoerg
265*e038c9c4Sjoerg return makeParseError(State, "unknown selector name: " + OpName);
266*e038c9c4Sjoerg }
267*e038c9c4Sjoerg
parseRangeSelector(llvm::StringRef Input)268*e038c9c4Sjoerg Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
269*e038c9c4Sjoerg ParseState State = {Input, Input};
270*e038c9c4Sjoerg ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
271*e038c9c4Sjoerg if (!Result)
272*e038c9c4Sjoerg return Result.takeError();
273*e038c9c4Sjoerg State = Result->State;
274*e038c9c4Sjoerg // Discard any potentially trailing whitespace.
275*e038c9c4Sjoerg State.Input = consumeWhitespace(State.Input);
276*e038c9c4Sjoerg if (State.Input.empty())
277*e038c9c4Sjoerg return Result->Value;
278*e038c9c4Sjoerg return makeParseError(State, "unexpected input after selector");
279*e038c9c4Sjoerg }
280