xref: /llvm-project/clang/lib/Tooling/Transformer/Parsing.cpp (revision 6ad0788c332bb2043142954d300c49ac3e537f34)
19ca50e88SYitzhak Mandelbaum //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
29ca50e88SYitzhak Mandelbaum //
39ca50e88SYitzhak Mandelbaum // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49ca50e88SYitzhak Mandelbaum // See https://llvm.org/LICENSE.txt for license information.
59ca50e88SYitzhak Mandelbaum // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69ca50e88SYitzhak Mandelbaum //
79ca50e88SYitzhak Mandelbaum //===----------------------------------------------------------------------===//
89ca50e88SYitzhak Mandelbaum 
99ca50e88SYitzhak Mandelbaum #include "clang/Tooling/Transformer/Parsing.h"
109ca50e88SYitzhak Mandelbaum #include "clang/AST/Expr.h"
119ca50e88SYitzhak Mandelbaum #include "clang/ASTMatchers/ASTMatchFinder.h"
129ca50e88SYitzhak Mandelbaum #include "clang/Basic/CharInfo.h"
139ca50e88SYitzhak Mandelbaum #include "clang/Basic/SourceLocation.h"
149ca50e88SYitzhak Mandelbaum #include "clang/Lex/Lexer.h"
159ca50e88SYitzhak Mandelbaum #include "clang/Tooling/Transformer/RangeSelector.h"
169ca50e88SYitzhak Mandelbaum #include "clang/Tooling/Transformer/SourceCode.h"
179ca50e88SYitzhak Mandelbaum #include "llvm/ADT/StringMap.h"
189ca50e88SYitzhak Mandelbaum #include "llvm/ADT/StringRef.h"
199ca50e88SYitzhak Mandelbaum #include "llvm/Support/Errc.h"
209ca50e88SYitzhak Mandelbaum #include "llvm/Support/Error.h"
21a1580d7bSKazu Hirata #include <optional>
229ca50e88SYitzhak Mandelbaum #include <string>
239ca50e88SYitzhak Mandelbaum #include <utility>
249ca50e88SYitzhak Mandelbaum #include <vector>
259ca50e88SYitzhak Mandelbaum 
269ca50e88SYitzhak Mandelbaum using namespace clang;
279ca50e88SYitzhak Mandelbaum using namespace transformer;
289ca50e88SYitzhak Mandelbaum 
299ca50e88SYitzhak Mandelbaum // FIXME: This implementation is entirely separate from that of the AST
309ca50e88SYitzhak Mandelbaum // matchers. Given the similarity of the languages and uses of the two parsers,
319ca50e88SYitzhak Mandelbaum // the two should share a common parsing infrastructure, as should other
329ca50e88SYitzhak Mandelbaum // Transformer types. We intend to unify this implementation soon to share as
339ca50e88SYitzhak Mandelbaum // much as possible with the AST Matchers parsing.
349ca50e88SYitzhak Mandelbaum 
359ca50e88SYitzhak Mandelbaum namespace {
369ca50e88SYitzhak Mandelbaum using llvm::Expected;
379ca50e88SYitzhak Mandelbaum 
389ca50e88SYitzhak Mandelbaum template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
399ca50e88SYitzhak Mandelbaum 
409ca50e88SYitzhak Mandelbaum struct ParseState {
419ca50e88SYitzhak Mandelbaum   // The remaining input to be processed.
429ca50e88SYitzhak Mandelbaum   StringRef Input;
439ca50e88SYitzhak Mandelbaum   // The original input. Not modified during parsing; only for reference in
449ca50e88SYitzhak Mandelbaum   // error reporting.
459ca50e88SYitzhak Mandelbaum   StringRef OriginalInput;
469ca50e88SYitzhak Mandelbaum };
479ca50e88SYitzhak Mandelbaum 
489ca50e88SYitzhak Mandelbaum // Represents an intermediate result returned by a parsing function. Functions
4922731dbdSKazu Hirata // that don't generate values should use `std::nullopt`
509ca50e88SYitzhak Mandelbaum template <typename ResultType> struct ParseProgress {
519ca50e88SYitzhak Mandelbaum   ParseState State;
529ca50e88SYitzhak Mandelbaum   // Intermediate result generated by the Parser.
539ca50e88SYitzhak Mandelbaum   ResultType Value;
549ca50e88SYitzhak Mandelbaum };
559ca50e88SYitzhak Mandelbaum 
569ca50e88SYitzhak Mandelbaum template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
579ca50e88SYitzhak Mandelbaum template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
589ca50e88SYitzhak Mandelbaum 
599ca50e88SYitzhak Mandelbaum class ParseError : public llvm::ErrorInfo<ParseError> {
609ca50e88SYitzhak Mandelbaum public:
619ca50e88SYitzhak Mandelbaum   // Required field for all ErrorInfo derivatives.
629ca50e88SYitzhak Mandelbaum   static char ID;
639ca50e88SYitzhak Mandelbaum 
ParseError(size_t Pos,std::string ErrorMsg,std::string InputExcerpt)649ca50e88SYitzhak Mandelbaum   ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
659ca50e88SYitzhak Mandelbaum       : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
669ca50e88SYitzhak Mandelbaum         Excerpt(std::move(InputExcerpt)) {}
679ca50e88SYitzhak Mandelbaum 
log(llvm::raw_ostream & OS) const689ca50e88SYitzhak Mandelbaum   void log(llvm::raw_ostream &OS) const override {
699ca50e88SYitzhak Mandelbaum     OS << "parse error at position (" << Pos << "): " << ErrorMsg
709ca50e88SYitzhak Mandelbaum        << ": " + Excerpt;
719ca50e88SYitzhak Mandelbaum   }
729ca50e88SYitzhak Mandelbaum 
convertToErrorCode() const739ca50e88SYitzhak Mandelbaum   std::error_code convertToErrorCode() const override {
749ca50e88SYitzhak Mandelbaum     return llvm::inconvertibleErrorCode();
759ca50e88SYitzhak Mandelbaum   }
769ca50e88SYitzhak Mandelbaum 
779ca50e88SYitzhak Mandelbaum   // Position of the error in the input string.
789ca50e88SYitzhak Mandelbaum   size_t Pos;
799ca50e88SYitzhak Mandelbaum   std::string ErrorMsg;
809ca50e88SYitzhak Mandelbaum   // Excerpt of the input starting at the error position.
819ca50e88SYitzhak Mandelbaum   std::string Excerpt;
829ca50e88SYitzhak Mandelbaum };
839ca50e88SYitzhak Mandelbaum 
849ca50e88SYitzhak Mandelbaum char ParseError::ID;
859ca50e88SYitzhak Mandelbaum } // namespace
869ca50e88SYitzhak Mandelbaum 
879ca50e88SYitzhak Mandelbaum static const llvm::StringMap<RangeSelectorOp<std::string>> &
getUnaryStringSelectors()889ca50e88SYitzhak Mandelbaum getUnaryStringSelectors() {
899ca50e88SYitzhak Mandelbaum   static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
909ca50e88SYitzhak Mandelbaum       {"name", name},
919ca50e88SYitzhak Mandelbaum       {"node", node},
929ca50e88SYitzhak Mandelbaum       {"statement", statement},
939ca50e88SYitzhak Mandelbaum       {"statements", statements},
949ca50e88SYitzhak Mandelbaum       {"member", member},
959ca50e88SYitzhak Mandelbaum       {"callArgs", callArgs},
969ca50e88SYitzhak Mandelbaum       {"elseBranch", elseBranch},
979ca50e88SYitzhak Mandelbaum       {"initListElements", initListElements}};
989ca50e88SYitzhak Mandelbaum   return M;
999ca50e88SYitzhak Mandelbaum }
1009ca50e88SYitzhak Mandelbaum 
1019ca50e88SYitzhak Mandelbaum static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
getUnaryRangeSelectors()1029ca50e88SYitzhak Mandelbaum getUnaryRangeSelectors() {
1039ca50e88SYitzhak Mandelbaum   static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
1049ca50e88SYitzhak Mandelbaum       {"before", before}, {"after", after}, {"expansion", expansion}};
1059ca50e88SYitzhak Mandelbaum   return M;
1069ca50e88SYitzhak Mandelbaum }
1079ca50e88SYitzhak Mandelbaum 
1089ca50e88SYitzhak Mandelbaum static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
getBinaryStringSelectors()1099ca50e88SYitzhak Mandelbaum getBinaryStringSelectors() {
1109ca50e88SYitzhak Mandelbaum   static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
11104a21318SYitzhak Mandelbaum       {"encloseNodes", encloseNodes}};
1129ca50e88SYitzhak Mandelbaum   return M;
1139ca50e88SYitzhak Mandelbaum }
1149ca50e88SYitzhak Mandelbaum 
1159ca50e88SYitzhak Mandelbaum static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
getBinaryRangeSelectors()1169ca50e88SYitzhak Mandelbaum getBinaryRangeSelectors() {
1179ca50e88SYitzhak Mandelbaum   static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
11804a21318SYitzhak Mandelbaum       M = {{"enclose", enclose}, {"between", between}};
1199ca50e88SYitzhak Mandelbaum   return M;
1209ca50e88SYitzhak Mandelbaum }
1219ca50e88SYitzhak Mandelbaum 
1229ca50e88SYitzhak Mandelbaum template <typename Element>
findOptional(const llvm::StringMap<Element> & Map,llvm::StringRef Key)123*6ad0788cSKazu Hirata std::optional<Element> findOptional(const llvm::StringMap<Element> &Map,
1249ca50e88SYitzhak Mandelbaum                                     llvm::StringRef Key) {
1259ca50e88SYitzhak Mandelbaum   auto it = Map.find(Key);
1269ca50e88SYitzhak Mandelbaum   if (it == Map.end())
1275891420eSKazu Hirata     return std::nullopt;
1289ca50e88SYitzhak Mandelbaum   return it->second;
1299ca50e88SYitzhak Mandelbaum }
1309ca50e88SYitzhak Mandelbaum 
1319ca50e88SYitzhak Mandelbaum template <typename ResultType>
makeParseProgress(ParseState State,ResultType Result)1329ca50e88SYitzhak Mandelbaum ParseProgress<ResultType> makeParseProgress(ParseState State,
1339ca50e88SYitzhak Mandelbaum                                             ResultType Result) {
1349ca50e88SYitzhak Mandelbaum   return ParseProgress<ResultType>{State, std::move(Result)};
1359ca50e88SYitzhak Mandelbaum }
1369ca50e88SYitzhak Mandelbaum 
makeParseError(const ParseState & S,std::string ErrorMsg)1379ca50e88SYitzhak Mandelbaum static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
1389ca50e88SYitzhak Mandelbaum   size_t Pos = S.OriginalInput.size() - S.Input.size();
1399ca50e88SYitzhak Mandelbaum   return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
1409ca50e88SYitzhak Mandelbaum                                       S.OriginalInput.substr(Pos, 20).str());
1419ca50e88SYitzhak Mandelbaum }
1429ca50e88SYitzhak Mandelbaum 
1439ca50e88SYitzhak Mandelbaum // Returns a new ParseState that advances \c S by \c N characters.
advance(ParseState S,size_t N)1449ca50e88SYitzhak Mandelbaum static ParseState advance(ParseState S, size_t N) {
1459ca50e88SYitzhak Mandelbaum   S.Input = S.Input.drop_front(N);
1469ca50e88SYitzhak Mandelbaum   return S;
1479ca50e88SYitzhak Mandelbaum }
1489ca50e88SYitzhak Mandelbaum 
consumeWhitespace(StringRef S)1499ca50e88SYitzhak Mandelbaum static StringRef consumeWhitespace(StringRef S) {
150a5cefd95SYitzhak Mandelbaum   return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
1519ca50e88SYitzhak Mandelbaum }
1529ca50e88SYitzhak Mandelbaum 
1539ca50e88SYitzhak Mandelbaum // Parses a single expected character \c c from \c State, skipping preceding
1549ca50e88SYitzhak Mandelbaum // whitespace.  Error if the expected character isn't found.
parseChar(char c,ParseState State)15534bcadc3SKazu Hirata static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) {
1569ca50e88SYitzhak Mandelbaum   State.Input = consumeWhitespace(State.Input);
1579ca50e88SYitzhak Mandelbaum   if (State.Input.empty() || State.Input.front() != c)
1589ca50e88SYitzhak Mandelbaum     return makeParseError(State,
1599ca50e88SYitzhak Mandelbaum                           ("expected char not found: " + llvm::Twine(c)).str());
1605891420eSKazu Hirata   return makeParseProgress(advance(State, 1), std::nullopt);
1619ca50e88SYitzhak Mandelbaum }
1629ca50e88SYitzhak Mandelbaum 
1639ca50e88SYitzhak Mandelbaum // Parses an identitifer "token" -- handles preceding whitespace.
parseId(ParseState State)1649ca50e88SYitzhak Mandelbaum static ExpectedProgress<std::string> parseId(ParseState State) {
1659ca50e88SYitzhak Mandelbaum   State.Input = consumeWhitespace(State.Input);
1669ca50e88SYitzhak Mandelbaum   auto Id = State.Input.take_while(
167601102d2SCorentin Jabot       [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); });
1689ca50e88SYitzhak Mandelbaum   if (Id.empty())
1699ca50e88SYitzhak Mandelbaum     return makeParseError(State, "failed to parse name");
1709ca50e88SYitzhak Mandelbaum   return makeParseProgress(advance(State, Id.size()), Id.str());
1719ca50e88SYitzhak Mandelbaum }
1729ca50e88SYitzhak Mandelbaum 
1739ca50e88SYitzhak Mandelbaum // For consistency with the AST matcher parser and C++ code, node ids are
1749ca50e88SYitzhak Mandelbaum // written as strings. However, we do not support escaping in the string.
parseStringId(ParseState State)1759ca50e88SYitzhak Mandelbaum static ExpectedProgress<std::string> parseStringId(ParseState State) {
1769ca50e88SYitzhak Mandelbaum   State.Input = consumeWhitespace(State.Input);
1779ca50e88SYitzhak Mandelbaum   if (State.Input.empty())
1789ca50e88SYitzhak Mandelbaum     return makeParseError(State, "unexpected end of input");
1799ca50e88SYitzhak Mandelbaum   if (!State.Input.consume_front("\""))
1809ca50e88SYitzhak Mandelbaum     return makeParseError(
1819ca50e88SYitzhak Mandelbaum         State,
1829ca50e88SYitzhak Mandelbaum         "expecting string, but encountered other character or end of input");
1839ca50e88SYitzhak Mandelbaum 
1849ca50e88SYitzhak Mandelbaum   StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
1859ca50e88SYitzhak Mandelbaum   if (State.Input.size() == Id.size())
1869ca50e88SYitzhak Mandelbaum     return makeParseError(State, "unterminated string");
1879ca50e88SYitzhak Mandelbaum   // Advance past the trailing quote as well.
1889ca50e88SYitzhak Mandelbaum   return makeParseProgress(advance(State, Id.size() + 1), Id.str());
1899ca50e88SYitzhak Mandelbaum }
1909ca50e88SYitzhak Mandelbaum 
1919ca50e88SYitzhak Mandelbaum // Parses a single element surrounded by parens. `Op` is applied to the parsed
1929ca50e88SYitzhak Mandelbaum // result to create the result of this function call.
1939ca50e88SYitzhak Mandelbaum template <typename T>
parseSingle(ParseFunction<T> ParseElement,RangeSelectorOp<T> Op,ParseState State)1949ca50e88SYitzhak Mandelbaum ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
1959ca50e88SYitzhak Mandelbaum                                             RangeSelectorOp<T> Op,
1969ca50e88SYitzhak Mandelbaum                                             ParseState State) {
1979ca50e88SYitzhak Mandelbaum   auto P = parseChar('(', State);
1989ca50e88SYitzhak Mandelbaum   if (!P)
1999ca50e88SYitzhak Mandelbaum     return P.takeError();
2009ca50e88SYitzhak Mandelbaum 
2019ca50e88SYitzhak Mandelbaum   auto E = ParseElement(P->State);
2029ca50e88SYitzhak Mandelbaum   if (!E)
2039ca50e88SYitzhak Mandelbaum     return E.takeError();
2049ca50e88SYitzhak Mandelbaum 
2059ca50e88SYitzhak Mandelbaum   P = parseChar(')', E->State);
2069ca50e88SYitzhak Mandelbaum   if (!P)
2079ca50e88SYitzhak Mandelbaum     return P.takeError();
2089ca50e88SYitzhak Mandelbaum 
2099ca50e88SYitzhak Mandelbaum   return makeParseProgress(P->State, Op(std::move(E->Value)));
2109ca50e88SYitzhak Mandelbaum }
2119ca50e88SYitzhak Mandelbaum 
2129ca50e88SYitzhak Mandelbaum // Parses a pair of elements surrounded by parens and separated by comma. `Op`
2139ca50e88SYitzhak Mandelbaum // is applied to the parsed results to create the result of this function call.
2149ca50e88SYitzhak Mandelbaum template <typename T>
parsePair(ParseFunction<T> ParseElement,RangeSelectorOp<T,T> Op,ParseState State)2159ca50e88SYitzhak Mandelbaum ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
2169ca50e88SYitzhak Mandelbaum                                           RangeSelectorOp<T, T> Op,
2179ca50e88SYitzhak Mandelbaum                                           ParseState State) {
2189ca50e88SYitzhak Mandelbaum   auto P = parseChar('(', State);
2199ca50e88SYitzhak Mandelbaum   if (!P)
2209ca50e88SYitzhak Mandelbaum     return P.takeError();
2219ca50e88SYitzhak Mandelbaum 
2229ca50e88SYitzhak Mandelbaum   auto Left = ParseElement(P->State);
2239ca50e88SYitzhak Mandelbaum   if (!Left)
2249ca50e88SYitzhak Mandelbaum     return Left.takeError();
2259ca50e88SYitzhak Mandelbaum 
2269ca50e88SYitzhak Mandelbaum   P = parseChar(',', Left->State);
2279ca50e88SYitzhak Mandelbaum   if (!P)
2289ca50e88SYitzhak Mandelbaum     return P.takeError();
2299ca50e88SYitzhak Mandelbaum 
2309ca50e88SYitzhak Mandelbaum   auto Right = ParseElement(P->State);
2319ca50e88SYitzhak Mandelbaum   if (!Right)
2329ca50e88SYitzhak Mandelbaum     return Right.takeError();
2339ca50e88SYitzhak Mandelbaum 
2349ca50e88SYitzhak Mandelbaum   P = parseChar(')', Right->State);
2359ca50e88SYitzhak Mandelbaum   if (!P)
2369ca50e88SYitzhak Mandelbaum     return P.takeError();
2379ca50e88SYitzhak Mandelbaum 
2389ca50e88SYitzhak Mandelbaum   return makeParseProgress(P->State,
2399ca50e88SYitzhak Mandelbaum                            Op(std::move(Left->Value), std::move(Right->Value)));
2409ca50e88SYitzhak Mandelbaum }
2419ca50e88SYitzhak Mandelbaum 
2429ca50e88SYitzhak Mandelbaum // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
2439ca50e88SYitzhak Mandelbaum // Id operator). Returns StencilType representing the operator on success and
2449ca50e88SYitzhak Mandelbaum // error if it fails to parse input for an operator.
2459ca50e88SYitzhak Mandelbaum static ExpectedProgress<RangeSelector>
parseRangeSelectorImpl(ParseState State)2469ca50e88SYitzhak Mandelbaum parseRangeSelectorImpl(ParseState State) {
2479ca50e88SYitzhak Mandelbaum   auto Id = parseId(State);
2489ca50e88SYitzhak Mandelbaum   if (!Id)
2499ca50e88SYitzhak Mandelbaum     return Id.takeError();
2509ca50e88SYitzhak Mandelbaum 
2519ca50e88SYitzhak Mandelbaum   std::string OpName = std::move(Id->Value);
2529ca50e88SYitzhak Mandelbaum   if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
2539ca50e88SYitzhak Mandelbaum     return parseSingle(parseStringId, *Op, Id->State);
2549ca50e88SYitzhak Mandelbaum 
2559ca50e88SYitzhak Mandelbaum   if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
2569ca50e88SYitzhak Mandelbaum     return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
2579ca50e88SYitzhak Mandelbaum 
2589ca50e88SYitzhak Mandelbaum   if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
2599ca50e88SYitzhak Mandelbaum     return parsePair(parseStringId, *Op, Id->State);
2609ca50e88SYitzhak Mandelbaum 
2619ca50e88SYitzhak Mandelbaum   if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
2629ca50e88SYitzhak Mandelbaum     return parsePair(parseRangeSelectorImpl, *Op, Id->State);
2639ca50e88SYitzhak Mandelbaum 
2649ca50e88SYitzhak Mandelbaum   return makeParseError(State, "unknown selector name: " + OpName);
2659ca50e88SYitzhak Mandelbaum }
2669ca50e88SYitzhak Mandelbaum 
parseRangeSelector(llvm::StringRef Input)2679ca50e88SYitzhak Mandelbaum Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
2689ca50e88SYitzhak Mandelbaum   ParseState State = {Input, Input};
2699ca50e88SYitzhak Mandelbaum   ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
2709ca50e88SYitzhak Mandelbaum   if (!Result)
2719ca50e88SYitzhak Mandelbaum     return Result.takeError();
2729ca50e88SYitzhak Mandelbaum   State = Result->State;
2739ca50e88SYitzhak Mandelbaum   // Discard any potentially trailing whitespace.
2749ca50e88SYitzhak Mandelbaum   State.Input = consumeWhitespace(State.Input);
2759ca50e88SYitzhak Mandelbaum   if (State.Input.empty())
2769ca50e88SYitzhak Mandelbaum     return Result->Value;
2779ca50e88SYitzhak Mandelbaum   return makeParseError(State, "unexpected input after selector");
2789ca50e88SYitzhak Mandelbaum }
279