xref: /openbsd-src/gnu/llvm/clang/lib/Tooling/Transformer/Parsing.cpp (revision 12c855180aad702bbcca06e0398d774beeafb155)
1ec727ea7Spatrick //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2ec727ea7Spatrick //
3ec727ea7Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ec727ea7Spatrick // See https://llvm.org/LICENSE.txt for license information.
5ec727ea7Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ec727ea7Spatrick //
7ec727ea7Spatrick //===----------------------------------------------------------------------===//
8ec727ea7Spatrick 
9ec727ea7Spatrick #include "clang/Tooling/Transformer/Parsing.h"
10ec727ea7Spatrick #include "clang/AST/Expr.h"
11ec727ea7Spatrick #include "clang/ASTMatchers/ASTMatchFinder.h"
12ec727ea7Spatrick #include "clang/Basic/CharInfo.h"
13ec727ea7Spatrick #include "clang/Basic/SourceLocation.h"
14ec727ea7Spatrick #include "clang/Lex/Lexer.h"
15ec727ea7Spatrick #include "clang/Tooling/Transformer/RangeSelector.h"
16ec727ea7Spatrick #include "clang/Tooling/Transformer/SourceCode.h"
17ec727ea7Spatrick #include "llvm/ADT/StringMap.h"
18ec727ea7Spatrick #include "llvm/ADT/StringRef.h"
19ec727ea7Spatrick #include "llvm/Support/Errc.h"
20ec727ea7Spatrick #include "llvm/Support/Error.h"
21*12c85518Srobert #include <optional>
22ec727ea7Spatrick #include <string>
23ec727ea7Spatrick #include <utility>
24ec727ea7Spatrick #include <vector>
25ec727ea7Spatrick 
26ec727ea7Spatrick using namespace clang;
27ec727ea7Spatrick using namespace transformer;
28ec727ea7Spatrick 
29ec727ea7Spatrick // FIXME: This implementation is entirely separate from that of the AST
30ec727ea7Spatrick // matchers. Given the similarity of the languages and uses of the two parsers,
31ec727ea7Spatrick // the two should share a common parsing infrastructure, as should other
32ec727ea7Spatrick // Transformer types. We intend to unify this implementation soon to share as
33ec727ea7Spatrick // much as possible with the AST Matchers parsing.
34ec727ea7Spatrick 
35ec727ea7Spatrick namespace {
36ec727ea7Spatrick using llvm::Expected;
37ec727ea7Spatrick 
38ec727ea7Spatrick template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
39ec727ea7Spatrick 
40ec727ea7Spatrick struct ParseState {
41ec727ea7Spatrick   // The remaining input to be processed.
42ec727ea7Spatrick   StringRef Input;
43ec727ea7Spatrick   // The original input. Not modified during parsing; only for reference in
44ec727ea7Spatrick   // error reporting.
45ec727ea7Spatrick   StringRef OriginalInput;
46ec727ea7Spatrick };
47ec727ea7Spatrick 
48ec727ea7Spatrick // Represents an intermediate result returned by a parsing function. Functions
49*12c85518Srobert // that don't generate values should use `std::nullopt`
50ec727ea7Spatrick template <typename ResultType> struct ParseProgress {
51ec727ea7Spatrick   ParseState State;
52ec727ea7Spatrick   // Intermediate result generated by the Parser.
53ec727ea7Spatrick   ResultType Value;
54ec727ea7Spatrick };
55ec727ea7Spatrick 
56ec727ea7Spatrick template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
57ec727ea7Spatrick template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
58ec727ea7Spatrick 
59ec727ea7Spatrick class ParseError : public llvm::ErrorInfo<ParseError> {
60ec727ea7Spatrick public:
61ec727ea7Spatrick   // Required field for all ErrorInfo derivatives.
62ec727ea7Spatrick   static char ID;
63ec727ea7Spatrick 
ParseError(size_t Pos,std::string ErrorMsg,std::string InputExcerpt)64ec727ea7Spatrick   ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
65ec727ea7Spatrick       : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
66ec727ea7Spatrick         Excerpt(std::move(InputExcerpt)) {}
67ec727ea7Spatrick 
log(llvm::raw_ostream & OS) const68ec727ea7Spatrick   void log(llvm::raw_ostream &OS) const override {
69ec727ea7Spatrick     OS << "parse error at position (" << Pos << "): " << ErrorMsg
70ec727ea7Spatrick        << ": " + Excerpt;
71ec727ea7Spatrick   }
72ec727ea7Spatrick 
convertToErrorCode() const73ec727ea7Spatrick   std::error_code convertToErrorCode() const override {
74ec727ea7Spatrick     return llvm::inconvertibleErrorCode();
75ec727ea7Spatrick   }
76ec727ea7Spatrick 
77ec727ea7Spatrick   // Position of the error in the input string.
78ec727ea7Spatrick   size_t Pos;
79ec727ea7Spatrick   std::string ErrorMsg;
80ec727ea7Spatrick   // Excerpt of the input starting at the error position.
81ec727ea7Spatrick   std::string Excerpt;
82ec727ea7Spatrick };
83ec727ea7Spatrick 
84ec727ea7Spatrick char ParseError::ID;
85ec727ea7Spatrick } // namespace
86ec727ea7Spatrick 
87ec727ea7Spatrick static const llvm::StringMap<RangeSelectorOp<std::string>> &
getUnaryStringSelectors()88ec727ea7Spatrick getUnaryStringSelectors() {
89ec727ea7Spatrick   static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
90ec727ea7Spatrick       {"name", name},
91ec727ea7Spatrick       {"node", node},
92ec727ea7Spatrick       {"statement", statement},
93ec727ea7Spatrick       {"statements", statements},
94ec727ea7Spatrick       {"member", member},
95ec727ea7Spatrick       {"callArgs", callArgs},
96ec727ea7Spatrick       {"elseBranch", elseBranch},
97ec727ea7Spatrick       {"initListElements", initListElements}};
98ec727ea7Spatrick   return M;
99ec727ea7Spatrick }
100ec727ea7Spatrick 
101ec727ea7Spatrick static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
getUnaryRangeSelectors()102ec727ea7Spatrick getUnaryRangeSelectors() {
103ec727ea7Spatrick   static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
104ec727ea7Spatrick       {"before", before}, {"after", after}, {"expansion", expansion}};
105ec727ea7Spatrick   return M;
106ec727ea7Spatrick }
107ec727ea7Spatrick 
108ec727ea7Spatrick static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
getBinaryStringSelectors()109ec727ea7Spatrick getBinaryStringSelectors() {
110ec727ea7Spatrick   static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
111a9ac8606Spatrick       {"encloseNodes", encloseNodes}};
112ec727ea7Spatrick   return M;
113ec727ea7Spatrick }
114ec727ea7Spatrick 
115ec727ea7Spatrick static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
getBinaryRangeSelectors()116ec727ea7Spatrick getBinaryRangeSelectors() {
117ec727ea7Spatrick   static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
118a9ac8606Spatrick       M = {{"enclose", enclose}, {"between", between}};
119ec727ea7Spatrick   return M;
120ec727ea7Spatrick }
121ec727ea7Spatrick 
122ec727ea7Spatrick template <typename Element>
findOptional(const llvm::StringMap<Element> & Map,llvm::StringRef Key)123*12c85518Srobert std::optional<Element> findOptional(const llvm::StringMap<Element> &Map,
124ec727ea7Spatrick                                     llvm::StringRef Key) {
125ec727ea7Spatrick   auto it = Map.find(Key);
126ec727ea7Spatrick   if (it == Map.end())
127*12c85518Srobert     return std::nullopt;
128ec727ea7Spatrick   return it->second;
129ec727ea7Spatrick }
130ec727ea7Spatrick 
131ec727ea7Spatrick template <typename ResultType>
makeParseProgress(ParseState State,ResultType Result)132ec727ea7Spatrick ParseProgress<ResultType> makeParseProgress(ParseState State,
133ec727ea7Spatrick                                             ResultType Result) {
134ec727ea7Spatrick   return ParseProgress<ResultType>{State, std::move(Result)};
135ec727ea7Spatrick }
136ec727ea7Spatrick 
makeParseError(const ParseState & S,std::string ErrorMsg)137ec727ea7Spatrick static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
138ec727ea7Spatrick   size_t Pos = S.OriginalInput.size() - S.Input.size();
139ec727ea7Spatrick   return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
140ec727ea7Spatrick                                       S.OriginalInput.substr(Pos, 20).str());
141ec727ea7Spatrick }
142ec727ea7Spatrick 
143ec727ea7Spatrick // Returns a new ParseState that advances \c S by \c N characters.
advance(ParseState S,size_t N)144ec727ea7Spatrick static ParseState advance(ParseState S, size_t N) {
145ec727ea7Spatrick   S.Input = S.Input.drop_front(N);
146ec727ea7Spatrick   return S;
147ec727ea7Spatrick }
148ec727ea7Spatrick 
consumeWhitespace(StringRef S)149ec727ea7Spatrick static StringRef consumeWhitespace(StringRef S) {
150a9ac8606Spatrick   return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
151ec727ea7Spatrick }
152ec727ea7Spatrick 
153ec727ea7Spatrick // Parses a single expected character \c c from \c State, skipping preceding
154ec727ea7Spatrick // whitespace.  Error if the expected character isn't found.
parseChar(char c,ParseState State)155*12c85518Srobert static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) {
156ec727ea7Spatrick   State.Input = consumeWhitespace(State.Input);
157ec727ea7Spatrick   if (State.Input.empty() || State.Input.front() != c)
158ec727ea7Spatrick     return makeParseError(State,
159ec727ea7Spatrick                           ("expected char not found: " + llvm::Twine(c)).str());
160*12c85518Srobert   return makeParseProgress(advance(State, 1), std::nullopt);
161ec727ea7Spatrick }
162ec727ea7Spatrick 
163ec727ea7Spatrick // Parses an identitifer "token" -- handles preceding whitespace.
parseId(ParseState State)164ec727ea7Spatrick static ExpectedProgress<std::string> parseId(ParseState State) {
165ec727ea7Spatrick   State.Input = consumeWhitespace(State.Input);
166ec727ea7Spatrick   auto Id = State.Input.take_while(
167*12c85518Srobert       [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
168ec727ea7Spatrick   if (Id.empty())
169ec727ea7Spatrick     return makeParseError(State, "failed to parse name");
170ec727ea7Spatrick   return makeParseProgress(advance(State, Id.size()), Id.str());
171ec727ea7Spatrick }
172ec727ea7Spatrick 
173ec727ea7Spatrick // For consistency with the AST matcher parser and C++ code, node ids are
174ec727ea7Spatrick // written as strings. However, we do not support escaping in the string.
parseStringId(ParseState State)175ec727ea7Spatrick static ExpectedProgress<std::string> parseStringId(ParseState State) {
176ec727ea7Spatrick   State.Input = consumeWhitespace(State.Input);
177ec727ea7Spatrick   if (State.Input.empty())
178ec727ea7Spatrick     return makeParseError(State, "unexpected end of input");
179ec727ea7Spatrick   if (!State.Input.consume_front("\""))
180ec727ea7Spatrick     return makeParseError(
181ec727ea7Spatrick         State,
182ec727ea7Spatrick         "expecting string, but encountered other character or end of input");
183ec727ea7Spatrick 
184ec727ea7Spatrick   StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
185ec727ea7Spatrick   if (State.Input.size() == Id.size())
186ec727ea7Spatrick     return makeParseError(State, "unterminated string");
187ec727ea7Spatrick   // Advance past the trailing quote as well.
188ec727ea7Spatrick   return makeParseProgress(advance(State, Id.size() + 1), Id.str());
189ec727ea7Spatrick }
190ec727ea7Spatrick 
191ec727ea7Spatrick // Parses a single element surrounded by parens. `Op` is applied to the parsed
192ec727ea7Spatrick // result to create the result of this function call.
193ec727ea7Spatrick template <typename T>
parseSingle(ParseFunction<T> ParseElement,RangeSelectorOp<T> Op,ParseState State)194ec727ea7Spatrick ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
195ec727ea7Spatrick                                             RangeSelectorOp<T> Op,
196ec727ea7Spatrick                                             ParseState State) {
197ec727ea7Spatrick   auto P = parseChar('(', State);
198ec727ea7Spatrick   if (!P)
199ec727ea7Spatrick     return P.takeError();
200ec727ea7Spatrick 
201ec727ea7Spatrick   auto E = ParseElement(P->State);
202ec727ea7Spatrick   if (!E)
203ec727ea7Spatrick     return E.takeError();
204ec727ea7Spatrick 
205ec727ea7Spatrick   P = parseChar(')', E->State);
206ec727ea7Spatrick   if (!P)
207ec727ea7Spatrick     return P.takeError();
208ec727ea7Spatrick 
209ec727ea7Spatrick   return makeParseProgress(P->State, Op(std::move(E->Value)));
210ec727ea7Spatrick }
211ec727ea7Spatrick 
212ec727ea7Spatrick // Parses a pair of elements surrounded by parens and separated by comma. `Op`
213ec727ea7Spatrick // is applied to the parsed results to create the result of this function call.
214ec727ea7Spatrick template <typename T>
parsePair(ParseFunction<T> ParseElement,RangeSelectorOp<T,T> Op,ParseState State)215ec727ea7Spatrick ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
216ec727ea7Spatrick                                           RangeSelectorOp<T, T> Op,
217ec727ea7Spatrick                                           ParseState State) {
218ec727ea7Spatrick   auto P = parseChar('(', State);
219ec727ea7Spatrick   if (!P)
220ec727ea7Spatrick     return P.takeError();
221ec727ea7Spatrick 
222ec727ea7Spatrick   auto Left = ParseElement(P->State);
223ec727ea7Spatrick   if (!Left)
224ec727ea7Spatrick     return Left.takeError();
225ec727ea7Spatrick 
226ec727ea7Spatrick   P = parseChar(',', Left->State);
227ec727ea7Spatrick   if (!P)
228ec727ea7Spatrick     return P.takeError();
229ec727ea7Spatrick 
230ec727ea7Spatrick   auto Right = ParseElement(P->State);
231ec727ea7Spatrick   if (!Right)
232ec727ea7Spatrick     return Right.takeError();
233ec727ea7Spatrick 
234ec727ea7Spatrick   P = parseChar(')', Right->State);
235ec727ea7Spatrick   if (!P)
236ec727ea7Spatrick     return P.takeError();
237ec727ea7Spatrick 
238ec727ea7Spatrick   return makeParseProgress(P->State,
239ec727ea7Spatrick                            Op(std::move(Left->Value), std::move(Right->Value)));
240ec727ea7Spatrick }
241ec727ea7Spatrick 
242ec727ea7Spatrick // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
243ec727ea7Spatrick // Id operator). Returns StencilType representing the operator on success and
244ec727ea7Spatrick // error if it fails to parse input for an operator.
245ec727ea7Spatrick static ExpectedProgress<RangeSelector>
parseRangeSelectorImpl(ParseState State)246ec727ea7Spatrick parseRangeSelectorImpl(ParseState State) {
247ec727ea7Spatrick   auto Id = parseId(State);
248ec727ea7Spatrick   if (!Id)
249ec727ea7Spatrick     return Id.takeError();
250ec727ea7Spatrick 
251ec727ea7Spatrick   std::string OpName = std::move(Id->Value);
252ec727ea7Spatrick   if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
253ec727ea7Spatrick     return parseSingle(parseStringId, *Op, Id->State);
254ec727ea7Spatrick 
255ec727ea7Spatrick   if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
256ec727ea7Spatrick     return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
257ec727ea7Spatrick 
258ec727ea7Spatrick   if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
259ec727ea7Spatrick     return parsePair(parseStringId, *Op, Id->State);
260ec727ea7Spatrick 
261ec727ea7Spatrick   if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
262ec727ea7Spatrick     return parsePair(parseRangeSelectorImpl, *Op, Id->State);
263ec727ea7Spatrick 
264ec727ea7Spatrick   return makeParseError(State, "unknown selector name: " + OpName);
265ec727ea7Spatrick }
266ec727ea7Spatrick 
parseRangeSelector(llvm::StringRef Input)267ec727ea7Spatrick Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
268ec727ea7Spatrick   ParseState State = {Input, Input};
269ec727ea7Spatrick   ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
270ec727ea7Spatrick   if (!Result)
271ec727ea7Spatrick     return Result.takeError();
272ec727ea7Spatrick   State = Result->State;
273ec727ea7Spatrick   // Discard any potentially trailing whitespace.
274ec727ea7Spatrick   State.Input = consumeWhitespace(State.Input);
275ec727ea7Spatrick   if (State.Input.empty())
276ec727ea7Spatrick     return Result->Value;
277ec727ea7Spatrick   return makeParseError(State, "unexpected input after selector");
278ec727ea7Spatrick }
279