xref: /freebsd-src/contrib/llvm-project/clang/lib/Tooling/Transformer/Parsing.cpp (revision 5ffd83dbcc34f10e07f6d3e968ae6365869615f4)
1*5ffd83dbSDimitry Andric //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2*5ffd83dbSDimitry Andric //
3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*5ffd83dbSDimitry Andric //
7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
8*5ffd83dbSDimitry Andric 
9*5ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/Parsing.h"
10*5ffd83dbSDimitry Andric #include "clang/AST/Expr.h"
11*5ffd83dbSDimitry Andric #include "clang/ASTMatchers/ASTMatchFinder.h"
12*5ffd83dbSDimitry Andric #include "clang/Basic/CharInfo.h"
13*5ffd83dbSDimitry Andric #include "clang/Basic/SourceLocation.h"
14*5ffd83dbSDimitry Andric #include "clang/Lex/Lexer.h"
15*5ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/RangeSelector.h"
16*5ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/SourceCode.h"
17*5ffd83dbSDimitry Andric #include "llvm/ADT/None.h"
18*5ffd83dbSDimitry Andric #include "llvm/ADT/StringMap.h"
19*5ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h"
20*5ffd83dbSDimitry Andric #include "llvm/Support/Errc.h"
21*5ffd83dbSDimitry Andric #include "llvm/Support/Error.h"
22*5ffd83dbSDimitry Andric #include <string>
23*5ffd83dbSDimitry Andric #include <utility>
24*5ffd83dbSDimitry Andric #include <vector>
25*5ffd83dbSDimitry Andric 
26*5ffd83dbSDimitry Andric using namespace clang;
27*5ffd83dbSDimitry Andric using namespace transformer;
28*5ffd83dbSDimitry Andric 
29*5ffd83dbSDimitry Andric // FIXME: This implementation is entirely separate from that of the AST
30*5ffd83dbSDimitry Andric // matchers. Given the similarity of the languages and uses of the two parsers,
31*5ffd83dbSDimitry Andric // the two should share a common parsing infrastructure, as should other
32*5ffd83dbSDimitry Andric // Transformer types. We intend to unify this implementation soon to share as
33*5ffd83dbSDimitry Andric // much as possible with the AST Matchers parsing.
34*5ffd83dbSDimitry Andric 
35*5ffd83dbSDimitry Andric namespace {
36*5ffd83dbSDimitry Andric using llvm::Error;
37*5ffd83dbSDimitry Andric using llvm::Expected;
38*5ffd83dbSDimitry Andric 
39*5ffd83dbSDimitry Andric template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
40*5ffd83dbSDimitry Andric 
41*5ffd83dbSDimitry Andric struct ParseState {
42*5ffd83dbSDimitry Andric   // The remaining input to be processed.
43*5ffd83dbSDimitry Andric   StringRef Input;
44*5ffd83dbSDimitry Andric   // The original input. Not modified during parsing; only for reference in
45*5ffd83dbSDimitry Andric   // error reporting.
46*5ffd83dbSDimitry Andric   StringRef OriginalInput;
47*5ffd83dbSDimitry Andric };
48*5ffd83dbSDimitry Andric 
49*5ffd83dbSDimitry Andric // Represents an intermediate result returned by a parsing function. Functions
50*5ffd83dbSDimitry Andric // that don't generate values should use `llvm::None`
51*5ffd83dbSDimitry Andric template <typename ResultType> struct ParseProgress {
52*5ffd83dbSDimitry Andric   ParseState State;
53*5ffd83dbSDimitry Andric   // Intermediate result generated by the Parser.
54*5ffd83dbSDimitry Andric   ResultType Value;
55*5ffd83dbSDimitry Andric };
56*5ffd83dbSDimitry Andric 
57*5ffd83dbSDimitry Andric template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
58*5ffd83dbSDimitry Andric template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
59*5ffd83dbSDimitry Andric 
60*5ffd83dbSDimitry Andric class ParseError : public llvm::ErrorInfo<ParseError> {
61*5ffd83dbSDimitry Andric public:
62*5ffd83dbSDimitry Andric   // Required field for all ErrorInfo derivatives.
63*5ffd83dbSDimitry Andric   static char ID;
64*5ffd83dbSDimitry Andric 
65*5ffd83dbSDimitry Andric   ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
66*5ffd83dbSDimitry Andric       : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
67*5ffd83dbSDimitry Andric         Excerpt(std::move(InputExcerpt)) {}
68*5ffd83dbSDimitry Andric 
69*5ffd83dbSDimitry Andric   void log(llvm::raw_ostream &OS) const override {
70*5ffd83dbSDimitry Andric     OS << "parse error at position (" << Pos << "): " << ErrorMsg
71*5ffd83dbSDimitry Andric        << ": " + Excerpt;
72*5ffd83dbSDimitry Andric   }
73*5ffd83dbSDimitry Andric 
74*5ffd83dbSDimitry Andric   std::error_code convertToErrorCode() const override {
75*5ffd83dbSDimitry Andric     return llvm::inconvertibleErrorCode();
76*5ffd83dbSDimitry Andric   }
77*5ffd83dbSDimitry Andric 
78*5ffd83dbSDimitry Andric   // Position of the error in the input string.
79*5ffd83dbSDimitry Andric   size_t Pos;
80*5ffd83dbSDimitry Andric   std::string ErrorMsg;
81*5ffd83dbSDimitry Andric   // Excerpt of the input starting at the error position.
82*5ffd83dbSDimitry Andric   std::string Excerpt;
83*5ffd83dbSDimitry Andric };
84*5ffd83dbSDimitry Andric 
85*5ffd83dbSDimitry Andric char ParseError::ID;
86*5ffd83dbSDimitry Andric } // namespace
87*5ffd83dbSDimitry Andric 
88*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string>> &
89*5ffd83dbSDimitry Andric getUnaryStringSelectors() {
90*5ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
91*5ffd83dbSDimitry Andric       {"name", name},
92*5ffd83dbSDimitry Andric       {"node", node},
93*5ffd83dbSDimitry Andric       {"statement", statement},
94*5ffd83dbSDimitry Andric       {"statements", statements},
95*5ffd83dbSDimitry Andric       {"member", member},
96*5ffd83dbSDimitry Andric       {"callArgs", callArgs},
97*5ffd83dbSDimitry Andric       {"elseBranch", elseBranch},
98*5ffd83dbSDimitry Andric       {"initListElements", initListElements}};
99*5ffd83dbSDimitry Andric   return M;
100*5ffd83dbSDimitry Andric }
101*5ffd83dbSDimitry Andric 
102*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
103*5ffd83dbSDimitry Andric getUnaryRangeSelectors() {
104*5ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
105*5ffd83dbSDimitry Andric       {"before", before}, {"after", after}, {"expansion", expansion}};
106*5ffd83dbSDimitry Andric   return M;
107*5ffd83dbSDimitry Andric }
108*5ffd83dbSDimitry Andric 
109*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
110*5ffd83dbSDimitry Andric getBinaryStringSelectors() {
111*5ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
112*5ffd83dbSDimitry Andric       {"encloseNodes", range}};
113*5ffd83dbSDimitry Andric   return M;
114*5ffd83dbSDimitry Andric }
115*5ffd83dbSDimitry Andric 
116*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
117*5ffd83dbSDimitry Andric getBinaryRangeSelectors() {
118*5ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
119*5ffd83dbSDimitry Andric       M = {{"enclose", range}};
120*5ffd83dbSDimitry Andric   return M;
121*5ffd83dbSDimitry Andric }
122*5ffd83dbSDimitry Andric 
123*5ffd83dbSDimitry Andric template <typename Element>
124*5ffd83dbSDimitry Andric llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
125*5ffd83dbSDimitry Andric                                      llvm::StringRef Key) {
126*5ffd83dbSDimitry Andric   auto it = Map.find(Key);
127*5ffd83dbSDimitry Andric   if (it == Map.end())
128*5ffd83dbSDimitry Andric     return llvm::None;
129*5ffd83dbSDimitry Andric   return it->second;
130*5ffd83dbSDimitry Andric }
131*5ffd83dbSDimitry Andric 
132*5ffd83dbSDimitry Andric template <typename ResultType>
133*5ffd83dbSDimitry Andric ParseProgress<ResultType> makeParseProgress(ParseState State,
134*5ffd83dbSDimitry Andric                                             ResultType Result) {
135*5ffd83dbSDimitry Andric   return ParseProgress<ResultType>{State, std::move(Result)};
136*5ffd83dbSDimitry Andric }
137*5ffd83dbSDimitry Andric 
138*5ffd83dbSDimitry Andric static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
139*5ffd83dbSDimitry Andric   size_t Pos = S.OriginalInput.size() - S.Input.size();
140*5ffd83dbSDimitry Andric   return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
141*5ffd83dbSDimitry Andric                                       S.OriginalInput.substr(Pos, 20).str());
142*5ffd83dbSDimitry Andric }
143*5ffd83dbSDimitry Andric 
144*5ffd83dbSDimitry Andric // Returns a new ParseState that advances \c S by \c N characters.
145*5ffd83dbSDimitry Andric static ParseState advance(ParseState S, size_t N) {
146*5ffd83dbSDimitry Andric   S.Input = S.Input.drop_front(N);
147*5ffd83dbSDimitry Andric   return S;
148*5ffd83dbSDimitry Andric }
149*5ffd83dbSDimitry Andric 
150*5ffd83dbSDimitry Andric static StringRef consumeWhitespace(StringRef S) {
151*5ffd83dbSDimitry Andric   return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); });
152*5ffd83dbSDimitry Andric }
153*5ffd83dbSDimitry Andric 
154*5ffd83dbSDimitry Andric // Parses a single expected character \c c from \c State, skipping preceding
155*5ffd83dbSDimitry Andric // whitespace.  Error if the expected character isn't found.
156*5ffd83dbSDimitry Andric static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
157*5ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
158*5ffd83dbSDimitry Andric   if (State.Input.empty() || State.Input.front() != c)
159*5ffd83dbSDimitry Andric     return makeParseError(State,
160*5ffd83dbSDimitry Andric                           ("expected char not found: " + llvm::Twine(c)).str());
161*5ffd83dbSDimitry Andric   return makeParseProgress(advance(State, 1), llvm::None);
162*5ffd83dbSDimitry Andric }
163*5ffd83dbSDimitry Andric 
164*5ffd83dbSDimitry Andric // Parses an identitifer "token" -- handles preceding whitespace.
165*5ffd83dbSDimitry Andric static ExpectedProgress<std::string> parseId(ParseState State) {
166*5ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
167*5ffd83dbSDimitry Andric   auto Id = State.Input.take_while(
168*5ffd83dbSDimitry Andric       [](char c) { return c >= 0 && isIdentifierBody(c); });
169*5ffd83dbSDimitry Andric   if (Id.empty())
170*5ffd83dbSDimitry Andric     return makeParseError(State, "failed to parse name");
171*5ffd83dbSDimitry Andric   return makeParseProgress(advance(State, Id.size()), Id.str());
172*5ffd83dbSDimitry Andric }
173*5ffd83dbSDimitry Andric 
174*5ffd83dbSDimitry Andric // For consistency with the AST matcher parser and C++ code, node ids are
175*5ffd83dbSDimitry Andric // written as strings. However, we do not support escaping in the string.
176*5ffd83dbSDimitry Andric static ExpectedProgress<std::string> parseStringId(ParseState State) {
177*5ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
178*5ffd83dbSDimitry Andric   if (State.Input.empty())
179*5ffd83dbSDimitry Andric     return makeParseError(State, "unexpected end of input");
180*5ffd83dbSDimitry Andric   if (!State.Input.consume_front("\""))
181*5ffd83dbSDimitry Andric     return makeParseError(
182*5ffd83dbSDimitry Andric         State,
183*5ffd83dbSDimitry Andric         "expecting string, but encountered other character or end of input");
184*5ffd83dbSDimitry Andric 
185*5ffd83dbSDimitry Andric   StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
186*5ffd83dbSDimitry Andric   if (State.Input.size() == Id.size())
187*5ffd83dbSDimitry Andric     return makeParseError(State, "unterminated string");
188*5ffd83dbSDimitry Andric   // Advance past the trailing quote as well.
189*5ffd83dbSDimitry Andric   return makeParseProgress(advance(State, Id.size() + 1), Id.str());
190*5ffd83dbSDimitry Andric }
191*5ffd83dbSDimitry Andric 
192*5ffd83dbSDimitry Andric // Parses a single element surrounded by parens. `Op` is applied to the parsed
193*5ffd83dbSDimitry Andric // result to create the result of this function call.
194*5ffd83dbSDimitry Andric template <typename T>
195*5ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
196*5ffd83dbSDimitry Andric                                             RangeSelectorOp<T> Op,
197*5ffd83dbSDimitry Andric                                             ParseState State) {
198*5ffd83dbSDimitry Andric   auto P = parseChar('(', State);
199*5ffd83dbSDimitry Andric   if (!P)
200*5ffd83dbSDimitry Andric     return P.takeError();
201*5ffd83dbSDimitry Andric 
202*5ffd83dbSDimitry Andric   auto E = ParseElement(P->State);
203*5ffd83dbSDimitry Andric   if (!E)
204*5ffd83dbSDimitry Andric     return E.takeError();
205*5ffd83dbSDimitry Andric 
206*5ffd83dbSDimitry Andric   P = parseChar(')', E->State);
207*5ffd83dbSDimitry Andric   if (!P)
208*5ffd83dbSDimitry Andric     return P.takeError();
209*5ffd83dbSDimitry Andric 
210*5ffd83dbSDimitry Andric   return makeParseProgress(P->State, Op(std::move(E->Value)));
211*5ffd83dbSDimitry Andric }
212*5ffd83dbSDimitry Andric 
213*5ffd83dbSDimitry Andric // Parses a pair of elements surrounded by parens and separated by comma. `Op`
214*5ffd83dbSDimitry Andric // is applied to the parsed results to create the result of this function call.
215*5ffd83dbSDimitry Andric template <typename T>
216*5ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
217*5ffd83dbSDimitry Andric                                           RangeSelectorOp<T, T> Op,
218*5ffd83dbSDimitry Andric                                           ParseState State) {
219*5ffd83dbSDimitry Andric   auto P = parseChar('(', State);
220*5ffd83dbSDimitry Andric   if (!P)
221*5ffd83dbSDimitry Andric     return P.takeError();
222*5ffd83dbSDimitry Andric 
223*5ffd83dbSDimitry Andric   auto Left = ParseElement(P->State);
224*5ffd83dbSDimitry Andric   if (!Left)
225*5ffd83dbSDimitry Andric     return Left.takeError();
226*5ffd83dbSDimitry Andric 
227*5ffd83dbSDimitry Andric   P = parseChar(',', Left->State);
228*5ffd83dbSDimitry Andric   if (!P)
229*5ffd83dbSDimitry Andric     return P.takeError();
230*5ffd83dbSDimitry Andric 
231*5ffd83dbSDimitry Andric   auto Right = ParseElement(P->State);
232*5ffd83dbSDimitry Andric   if (!Right)
233*5ffd83dbSDimitry Andric     return Right.takeError();
234*5ffd83dbSDimitry Andric 
235*5ffd83dbSDimitry Andric   P = parseChar(')', Right->State);
236*5ffd83dbSDimitry Andric   if (!P)
237*5ffd83dbSDimitry Andric     return P.takeError();
238*5ffd83dbSDimitry Andric 
239*5ffd83dbSDimitry Andric   return makeParseProgress(P->State,
240*5ffd83dbSDimitry Andric                            Op(std::move(Left->Value), std::move(Right->Value)));
241*5ffd83dbSDimitry Andric }
242*5ffd83dbSDimitry Andric 
243*5ffd83dbSDimitry Andric // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
244*5ffd83dbSDimitry Andric // Id operator). Returns StencilType representing the operator on success and
245*5ffd83dbSDimitry Andric // error if it fails to parse input for an operator.
246*5ffd83dbSDimitry Andric static ExpectedProgress<RangeSelector>
247*5ffd83dbSDimitry Andric parseRangeSelectorImpl(ParseState State) {
248*5ffd83dbSDimitry Andric   auto Id = parseId(State);
249*5ffd83dbSDimitry Andric   if (!Id)
250*5ffd83dbSDimitry Andric     return Id.takeError();
251*5ffd83dbSDimitry Andric 
252*5ffd83dbSDimitry Andric   std::string OpName = std::move(Id->Value);
253*5ffd83dbSDimitry Andric   if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
254*5ffd83dbSDimitry Andric     return parseSingle(parseStringId, *Op, Id->State);
255*5ffd83dbSDimitry Andric 
256*5ffd83dbSDimitry Andric   if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
257*5ffd83dbSDimitry Andric     return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
258*5ffd83dbSDimitry Andric 
259*5ffd83dbSDimitry Andric   if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
260*5ffd83dbSDimitry Andric     return parsePair(parseStringId, *Op, Id->State);
261*5ffd83dbSDimitry Andric 
262*5ffd83dbSDimitry Andric   if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
263*5ffd83dbSDimitry Andric     return parsePair(parseRangeSelectorImpl, *Op, Id->State);
264*5ffd83dbSDimitry Andric 
265*5ffd83dbSDimitry Andric   return makeParseError(State, "unknown selector name: " + OpName);
266*5ffd83dbSDimitry Andric }
267*5ffd83dbSDimitry Andric 
268*5ffd83dbSDimitry Andric Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
269*5ffd83dbSDimitry Andric   ParseState State = {Input, Input};
270*5ffd83dbSDimitry Andric   ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
271*5ffd83dbSDimitry Andric   if (!Result)
272*5ffd83dbSDimitry Andric     return Result.takeError();
273*5ffd83dbSDimitry Andric   State = Result->State;
274*5ffd83dbSDimitry Andric   // Discard any potentially trailing whitespace.
275*5ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
276*5ffd83dbSDimitry Andric   if (State.Input.empty())
277*5ffd83dbSDimitry Andric     return Result->Value;
278*5ffd83dbSDimitry Andric   return makeParseError(State, "unexpected input after selector");
279*5ffd83dbSDimitry Andric }
280