xref: /llvm-project/clang/lib/Tooling/Transformer/Parsing.cpp (revision c25cc84b87935feefea5a93abc16efdbc9d91640)
1 //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Tooling/Transformer/Parsing.h"
10 #include "clang/AST/Expr.h"
11 #include "clang/ASTMatchers/ASTMatchFinder.h"
12 #include "clang/Basic/CharInfo.h"
13 #include "clang/Basic/SourceLocation.h"
14 #include "clang/Lex/Lexer.h"
15 #include "clang/Tooling/Transformer/RangeSelector.h"
16 #include "clang/Tooling/Transformer/SourceCode.h"
17 #include "llvm/ADT/StringMap.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/Support/Errc.h"
20 #include "llvm/Support/Error.h"
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 using namespace clang;
26 using namespace transformer;
27 
28 // FIXME: This implementation is entirely separate from that of the AST
29 // matchers. Given the similarity of the languages and uses of the two parsers,
30 // the two should share a common parsing infrastructure, as should other
31 // Transformer types. We intend to unify this implementation soon to share as
32 // much as possible with the AST Matchers parsing.
33 
34 namespace {
35 using llvm::Expected;
36 
37 template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
38 
39 struct ParseState {
40   // The remaining input to be processed.
41   StringRef Input;
42   // The original input. Not modified during parsing; only for reference in
43   // error reporting.
44   StringRef OriginalInput;
45 };
46 
47 // Represents an intermediate result returned by a parsing function. Functions
48 // that don't generate values should use `std::nullopt`
49 template <typename ResultType> struct ParseProgress {
50   ParseState State;
51   // Intermediate result generated by the Parser.
52   ResultType Value;
53 };
54 
55 template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
56 template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
57 
58 class ParseError : public llvm::ErrorInfo<ParseError> {
59 public:
60   // Required field for all ErrorInfo derivatives.
61   static char ID;
62 
63   ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
64       : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
65         Excerpt(std::move(InputExcerpt)) {}
66 
67   void log(llvm::raw_ostream &OS) const override {
68     OS << "parse error at position (" << Pos << "): " << ErrorMsg
69        << ": " + Excerpt;
70   }
71 
72   std::error_code convertToErrorCode() const override {
73     return llvm::inconvertibleErrorCode();
74   }
75 
76   // Position of the error in the input string.
77   size_t Pos;
78   std::string ErrorMsg;
79   // Excerpt of the input starting at the error position.
80   std::string Excerpt;
81 };
82 
83 char ParseError::ID;
84 } // namespace
85 
86 static const llvm::StringMap<RangeSelectorOp<std::string>> &
87 getUnaryStringSelectors() {
88   static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
89       {"name", name},
90       {"node", node},
91       {"statement", statement},
92       {"statements", statements},
93       {"member", member},
94       {"callArgs", callArgs},
95       {"elseBranch", elseBranch},
96       {"initListElements", initListElements}};
97   return M;
98 }
99 
100 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
101 getUnaryRangeSelectors() {
102   static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
103       {"before", before}, {"after", after}, {"expansion", expansion}};
104   return M;
105 }
106 
107 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
108 getBinaryStringSelectors() {
109   static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
110       {"encloseNodes", encloseNodes}};
111   return M;
112 }
113 
114 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
115 getBinaryRangeSelectors() {
116   static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
117       M = {{"enclose", enclose}, {"between", between}};
118   return M;
119 }
120 
121 template <typename Element>
122 llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
123                                      llvm::StringRef Key) {
124   auto it = Map.find(Key);
125   if (it == Map.end())
126     return std::nullopt;
127   return it->second;
128 }
129 
130 template <typename ResultType>
131 ParseProgress<ResultType> makeParseProgress(ParseState State,
132                                             ResultType Result) {
133   return ParseProgress<ResultType>{State, std::move(Result)};
134 }
135 
136 static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
137   size_t Pos = S.OriginalInput.size() - S.Input.size();
138   return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
139                                       S.OriginalInput.substr(Pos, 20).str());
140 }
141 
142 // Returns a new ParseState that advances \c S by \c N characters.
143 static ParseState advance(ParseState S, size_t N) {
144   S.Input = S.Input.drop_front(N);
145   return S;
146 }
147 
148 static StringRef consumeWhitespace(StringRef S) {
149   return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
150 }
151 
152 // Parses a single expected character \c c from \c State, skipping preceding
153 // whitespace.  Error if the expected character isn't found.
154 static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) {
155   State.Input = consumeWhitespace(State.Input);
156   if (State.Input.empty() || State.Input.front() != c)
157     return makeParseError(State,
158                           ("expected char not found: " + llvm::Twine(c)).str());
159   return makeParseProgress(advance(State, 1), std::nullopt);
160 }
161 
162 // Parses an identitifer "token" -- handles preceding whitespace.
163 static ExpectedProgress<std::string> parseId(ParseState State) {
164   State.Input = consumeWhitespace(State.Input);
165   auto Id = State.Input.take_while(
166       [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
167   if (Id.empty())
168     return makeParseError(State, "failed to parse name");
169   return makeParseProgress(advance(State, Id.size()), Id.str());
170 }
171 
172 // For consistency with the AST matcher parser and C++ code, node ids are
173 // written as strings. However, we do not support escaping in the string.
174 static ExpectedProgress<std::string> parseStringId(ParseState State) {
175   State.Input = consumeWhitespace(State.Input);
176   if (State.Input.empty())
177     return makeParseError(State, "unexpected end of input");
178   if (!State.Input.consume_front("\""))
179     return makeParseError(
180         State,
181         "expecting string, but encountered other character or end of input");
182 
183   StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
184   if (State.Input.size() == Id.size())
185     return makeParseError(State, "unterminated string");
186   // Advance past the trailing quote as well.
187   return makeParseProgress(advance(State, Id.size() + 1), Id.str());
188 }
189 
190 // Parses a single element surrounded by parens. `Op` is applied to the parsed
191 // result to create the result of this function call.
192 template <typename T>
193 ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
194                                             RangeSelectorOp<T> Op,
195                                             ParseState State) {
196   auto P = parseChar('(', State);
197   if (!P)
198     return P.takeError();
199 
200   auto E = ParseElement(P->State);
201   if (!E)
202     return E.takeError();
203 
204   P = parseChar(')', E->State);
205   if (!P)
206     return P.takeError();
207 
208   return makeParseProgress(P->State, Op(std::move(E->Value)));
209 }
210 
211 // Parses a pair of elements surrounded by parens and separated by comma. `Op`
212 // is applied to the parsed results to create the result of this function call.
213 template <typename T>
214 ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
215                                           RangeSelectorOp<T, T> Op,
216                                           ParseState State) {
217   auto P = parseChar('(', State);
218   if (!P)
219     return P.takeError();
220 
221   auto Left = ParseElement(P->State);
222   if (!Left)
223     return Left.takeError();
224 
225   P = parseChar(',', Left->State);
226   if (!P)
227     return P.takeError();
228 
229   auto Right = ParseElement(P->State);
230   if (!Right)
231     return Right.takeError();
232 
233   P = parseChar(')', Right->State);
234   if (!P)
235     return P.takeError();
236 
237   return makeParseProgress(P->State,
238                            Op(std::move(Left->Value), std::move(Right->Value)));
239 }
240 
241 // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
242 // Id operator). Returns StencilType representing the operator on success and
243 // error if it fails to parse input for an operator.
244 static ExpectedProgress<RangeSelector>
245 parseRangeSelectorImpl(ParseState State) {
246   auto Id = parseId(State);
247   if (!Id)
248     return Id.takeError();
249 
250   std::string OpName = std::move(Id->Value);
251   if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
252     return parseSingle(parseStringId, *Op, Id->State);
253 
254   if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
255     return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
256 
257   if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
258     return parsePair(parseStringId, *Op, Id->State);
259 
260   if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
261     return parsePair(parseRangeSelectorImpl, *Op, Id->State);
262 
263   return makeParseError(State, "unknown selector name: " + OpName);
264 }
265 
266 Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
267   ParseState State = {Input, Input};
268   ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
269   if (!Result)
270     return Result.takeError();
271   State = Result->State;
272   // Discard any potentially trailing whitespace.
273   State.Input = consumeWhitespace(State.Input);
274   if (State.Input.empty())
275     return Result->Value;
276   return makeParseError(State, "unexpected input after selector");
277 }
278