xref: /freebsd-src/contrib/llvm-project/clang/lib/Tooling/Transformer/Parsing.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
15ffd83dbSDimitry Andric //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
25ffd83dbSDimitry Andric //
35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric //
75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===//
85ffd83dbSDimitry Andric 
95ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/Parsing.h"
105ffd83dbSDimitry Andric #include "clang/AST/Expr.h"
115ffd83dbSDimitry Andric #include "clang/ASTMatchers/ASTMatchFinder.h"
125ffd83dbSDimitry Andric #include "clang/Basic/CharInfo.h"
135ffd83dbSDimitry Andric #include "clang/Basic/SourceLocation.h"
145ffd83dbSDimitry Andric #include "clang/Lex/Lexer.h"
155ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/RangeSelector.h"
165ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/SourceCode.h"
175ffd83dbSDimitry Andric #include "llvm/ADT/None.h"
185ffd83dbSDimitry Andric #include "llvm/ADT/StringMap.h"
195ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h"
205ffd83dbSDimitry Andric #include "llvm/Support/Errc.h"
215ffd83dbSDimitry Andric #include "llvm/Support/Error.h"
225ffd83dbSDimitry Andric #include <string>
235ffd83dbSDimitry Andric #include <utility>
245ffd83dbSDimitry Andric #include <vector>
255ffd83dbSDimitry Andric 
265ffd83dbSDimitry Andric using namespace clang;
275ffd83dbSDimitry Andric using namespace transformer;
285ffd83dbSDimitry Andric 
295ffd83dbSDimitry Andric // FIXME: This implementation is entirely separate from that of the AST
305ffd83dbSDimitry Andric // matchers. Given the similarity of the languages and uses of the two parsers,
315ffd83dbSDimitry Andric // the two should share a common parsing infrastructure, as should other
325ffd83dbSDimitry Andric // Transformer types. We intend to unify this implementation soon to share as
335ffd83dbSDimitry Andric // much as possible with the AST Matchers parsing.
345ffd83dbSDimitry Andric 
355ffd83dbSDimitry Andric namespace {
365ffd83dbSDimitry Andric using llvm::Error;
375ffd83dbSDimitry Andric using llvm::Expected;
385ffd83dbSDimitry Andric 
395ffd83dbSDimitry Andric template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);
405ffd83dbSDimitry Andric 
415ffd83dbSDimitry Andric struct ParseState {
425ffd83dbSDimitry Andric   // The remaining input to be processed.
435ffd83dbSDimitry Andric   StringRef Input;
445ffd83dbSDimitry Andric   // The original input. Not modified during parsing; only for reference in
455ffd83dbSDimitry Andric   // error reporting.
465ffd83dbSDimitry Andric   StringRef OriginalInput;
475ffd83dbSDimitry Andric };
485ffd83dbSDimitry Andric 
495ffd83dbSDimitry Andric // Represents an intermediate result returned by a parsing function. Functions
505ffd83dbSDimitry Andric // that don't generate values should use `llvm::None`
515ffd83dbSDimitry Andric template <typename ResultType> struct ParseProgress {
525ffd83dbSDimitry Andric   ParseState State;
535ffd83dbSDimitry Andric   // Intermediate result generated by the Parser.
545ffd83dbSDimitry Andric   ResultType Value;
555ffd83dbSDimitry Andric };
565ffd83dbSDimitry Andric 
575ffd83dbSDimitry Andric template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
585ffd83dbSDimitry Andric template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);
595ffd83dbSDimitry Andric 
605ffd83dbSDimitry Andric class ParseError : public llvm::ErrorInfo<ParseError> {
615ffd83dbSDimitry Andric public:
625ffd83dbSDimitry Andric   // Required field for all ErrorInfo derivatives.
635ffd83dbSDimitry Andric   static char ID;
645ffd83dbSDimitry Andric 
655ffd83dbSDimitry Andric   ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
665ffd83dbSDimitry Andric       : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
675ffd83dbSDimitry Andric         Excerpt(std::move(InputExcerpt)) {}
685ffd83dbSDimitry Andric 
695ffd83dbSDimitry Andric   void log(llvm::raw_ostream &OS) const override {
705ffd83dbSDimitry Andric     OS << "parse error at position (" << Pos << "): " << ErrorMsg
715ffd83dbSDimitry Andric        << ": " + Excerpt;
725ffd83dbSDimitry Andric   }
735ffd83dbSDimitry Andric 
745ffd83dbSDimitry Andric   std::error_code convertToErrorCode() const override {
755ffd83dbSDimitry Andric     return llvm::inconvertibleErrorCode();
765ffd83dbSDimitry Andric   }
775ffd83dbSDimitry Andric 
785ffd83dbSDimitry Andric   // Position of the error in the input string.
795ffd83dbSDimitry Andric   size_t Pos;
805ffd83dbSDimitry Andric   std::string ErrorMsg;
815ffd83dbSDimitry Andric   // Excerpt of the input starting at the error position.
825ffd83dbSDimitry Andric   std::string Excerpt;
835ffd83dbSDimitry Andric };
845ffd83dbSDimitry Andric 
855ffd83dbSDimitry Andric char ParseError::ID;
865ffd83dbSDimitry Andric } // namespace
875ffd83dbSDimitry Andric 
885ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string>> &
895ffd83dbSDimitry Andric getUnaryStringSelectors() {
905ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
915ffd83dbSDimitry Andric       {"name", name},
925ffd83dbSDimitry Andric       {"node", node},
935ffd83dbSDimitry Andric       {"statement", statement},
945ffd83dbSDimitry Andric       {"statements", statements},
955ffd83dbSDimitry Andric       {"member", member},
965ffd83dbSDimitry Andric       {"callArgs", callArgs},
975ffd83dbSDimitry Andric       {"elseBranch", elseBranch},
985ffd83dbSDimitry Andric       {"initListElements", initListElements}};
995ffd83dbSDimitry Andric   return M;
1005ffd83dbSDimitry Andric }
1015ffd83dbSDimitry Andric 
1025ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
1035ffd83dbSDimitry Andric getUnaryRangeSelectors() {
1045ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
1055ffd83dbSDimitry Andric       {"before", before}, {"after", after}, {"expansion", expansion}};
1065ffd83dbSDimitry Andric   return M;
1075ffd83dbSDimitry Andric }
1085ffd83dbSDimitry Andric 
1095ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
1105ffd83dbSDimitry Andric getBinaryStringSelectors() {
1115ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
112*e8d8bef9SDimitry Andric       {"encloseNodes", encloseNodes}};
1135ffd83dbSDimitry Andric   return M;
1145ffd83dbSDimitry Andric }
1155ffd83dbSDimitry Andric 
1165ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
1175ffd83dbSDimitry Andric getBinaryRangeSelectors() {
1185ffd83dbSDimitry Andric   static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
119*e8d8bef9SDimitry Andric       M = {{"enclose", enclose}, {"between", between}};
1205ffd83dbSDimitry Andric   return M;
1215ffd83dbSDimitry Andric }
1225ffd83dbSDimitry Andric 
1235ffd83dbSDimitry Andric template <typename Element>
1245ffd83dbSDimitry Andric llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
1255ffd83dbSDimitry Andric                                      llvm::StringRef Key) {
1265ffd83dbSDimitry Andric   auto it = Map.find(Key);
1275ffd83dbSDimitry Andric   if (it == Map.end())
1285ffd83dbSDimitry Andric     return llvm::None;
1295ffd83dbSDimitry Andric   return it->second;
1305ffd83dbSDimitry Andric }
1315ffd83dbSDimitry Andric 
1325ffd83dbSDimitry Andric template <typename ResultType>
1335ffd83dbSDimitry Andric ParseProgress<ResultType> makeParseProgress(ParseState State,
1345ffd83dbSDimitry Andric                                             ResultType Result) {
1355ffd83dbSDimitry Andric   return ParseProgress<ResultType>{State, std::move(Result)};
1365ffd83dbSDimitry Andric }
1375ffd83dbSDimitry Andric 
1385ffd83dbSDimitry Andric static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
1395ffd83dbSDimitry Andric   size_t Pos = S.OriginalInput.size() - S.Input.size();
1405ffd83dbSDimitry Andric   return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
1415ffd83dbSDimitry Andric                                       S.OriginalInput.substr(Pos, 20).str());
1425ffd83dbSDimitry Andric }
1435ffd83dbSDimitry Andric 
1445ffd83dbSDimitry Andric // Returns a new ParseState that advances \c S by \c N characters.
1455ffd83dbSDimitry Andric static ParseState advance(ParseState S, size_t N) {
1465ffd83dbSDimitry Andric   S.Input = S.Input.drop_front(N);
1475ffd83dbSDimitry Andric   return S;
1485ffd83dbSDimitry Andric }
1495ffd83dbSDimitry Andric 
1505ffd83dbSDimitry Andric static StringRef consumeWhitespace(StringRef S) {
151*e8d8bef9SDimitry Andric   return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
1525ffd83dbSDimitry Andric }
1535ffd83dbSDimitry Andric 
1545ffd83dbSDimitry Andric // Parses a single expected character \c c from \c State, skipping preceding
1555ffd83dbSDimitry Andric // whitespace.  Error if the expected character isn't found.
1565ffd83dbSDimitry Andric static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
1575ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
1585ffd83dbSDimitry Andric   if (State.Input.empty() || State.Input.front() != c)
1595ffd83dbSDimitry Andric     return makeParseError(State,
1605ffd83dbSDimitry Andric                           ("expected char not found: " + llvm::Twine(c)).str());
1615ffd83dbSDimitry Andric   return makeParseProgress(advance(State, 1), llvm::None);
1625ffd83dbSDimitry Andric }
1635ffd83dbSDimitry Andric 
1645ffd83dbSDimitry Andric // Parses an identitifer "token" -- handles preceding whitespace.
1655ffd83dbSDimitry Andric static ExpectedProgress<std::string> parseId(ParseState State) {
1665ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
1675ffd83dbSDimitry Andric   auto Id = State.Input.take_while(
168*e8d8bef9SDimitry Andric       [](char c) { return isASCII(c) && isIdentifierBody(c); });
1695ffd83dbSDimitry Andric   if (Id.empty())
1705ffd83dbSDimitry Andric     return makeParseError(State, "failed to parse name");
1715ffd83dbSDimitry Andric   return makeParseProgress(advance(State, Id.size()), Id.str());
1725ffd83dbSDimitry Andric }
1735ffd83dbSDimitry Andric 
1745ffd83dbSDimitry Andric // For consistency with the AST matcher parser and C++ code, node ids are
1755ffd83dbSDimitry Andric // written as strings. However, we do not support escaping in the string.
1765ffd83dbSDimitry Andric static ExpectedProgress<std::string> parseStringId(ParseState State) {
1775ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
1785ffd83dbSDimitry Andric   if (State.Input.empty())
1795ffd83dbSDimitry Andric     return makeParseError(State, "unexpected end of input");
1805ffd83dbSDimitry Andric   if (!State.Input.consume_front("\""))
1815ffd83dbSDimitry Andric     return makeParseError(
1825ffd83dbSDimitry Andric         State,
1835ffd83dbSDimitry Andric         "expecting string, but encountered other character or end of input");
1845ffd83dbSDimitry Andric 
1855ffd83dbSDimitry Andric   StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
1865ffd83dbSDimitry Andric   if (State.Input.size() == Id.size())
1875ffd83dbSDimitry Andric     return makeParseError(State, "unterminated string");
1885ffd83dbSDimitry Andric   // Advance past the trailing quote as well.
1895ffd83dbSDimitry Andric   return makeParseProgress(advance(State, Id.size() + 1), Id.str());
1905ffd83dbSDimitry Andric }
1915ffd83dbSDimitry Andric 
1925ffd83dbSDimitry Andric // Parses a single element surrounded by parens. `Op` is applied to the parsed
1935ffd83dbSDimitry Andric // result to create the result of this function call.
1945ffd83dbSDimitry Andric template <typename T>
1955ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
1965ffd83dbSDimitry Andric                                             RangeSelectorOp<T> Op,
1975ffd83dbSDimitry Andric                                             ParseState State) {
1985ffd83dbSDimitry Andric   auto P = parseChar('(', State);
1995ffd83dbSDimitry Andric   if (!P)
2005ffd83dbSDimitry Andric     return P.takeError();
2015ffd83dbSDimitry Andric 
2025ffd83dbSDimitry Andric   auto E = ParseElement(P->State);
2035ffd83dbSDimitry Andric   if (!E)
2045ffd83dbSDimitry Andric     return E.takeError();
2055ffd83dbSDimitry Andric 
2065ffd83dbSDimitry Andric   P = parseChar(')', E->State);
2075ffd83dbSDimitry Andric   if (!P)
2085ffd83dbSDimitry Andric     return P.takeError();
2095ffd83dbSDimitry Andric 
2105ffd83dbSDimitry Andric   return makeParseProgress(P->State, Op(std::move(E->Value)));
2115ffd83dbSDimitry Andric }
2125ffd83dbSDimitry Andric 
2135ffd83dbSDimitry Andric // Parses a pair of elements surrounded by parens and separated by comma. `Op`
2145ffd83dbSDimitry Andric // is applied to the parsed results to create the result of this function call.
2155ffd83dbSDimitry Andric template <typename T>
2165ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
2175ffd83dbSDimitry Andric                                           RangeSelectorOp<T, T> Op,
2185ffd83dbSDimitry Andric                                           ParseState State) {
2195ffd83dbSDimitry Andric   auto P = parseChar('(', State);
2205ffd83dbSDimitry Andric   if (!P)
2215ffd83dbSDimitry Andric     return P.takeError();
2225ffd83dbSDimitry Andric 
2235ffd83dbSDimitry Andric   auto Left = ParseElement(P->State);
2245ffd83dbSDimitry Andric   if (!Left)
2255ffd83dbSDimitry Andric     return Left.takeError();
2265ffd83dbSDimitry Andric 
2275ffd83dbSDimitry Andric   P = parseChar(',', Left->State);
2285ffd83dbSDimitry Andric   if (!P)
2295ffd83dbSDimitry Andric     return P.takeError();
2305ffd83dbSDimitry Andric 
2315ffd83dbSDimitry Andric   auto Right = ParseElement(P->State);
2325ffd83dbSDimitry Andric   if (!Right)
2335ffd83dbSDimitry Andric     return Right.takeError();
2345ffd83dbSDimitry Andric 
2355ffd83dbSDimitry Andric   P = parseChar(')', Right->State);
2365ffd83dbSDimitry Andric   if (!P)
2375ffd83dbSDimitry Andric     return P.takeError();
2385ffd83dbSDimitry Andric 
2395ffd83dbSDimitry Andric   return makeParseProgress(P->State,
2405ffd83dbSDimitry Andric                            Op(std::move(Left->Value), std::move(Right->Value)));
2415ffd83dbSDimitry Andric }
2425ffd83dbSDimitry Andric 
2435ffd83dbSDimitry Andric // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
2445ffd83dbSDimitry Andric // Id operator). Returns StencilType representing the operator on success and
2455ffd83dbSDimitry Andric // error if it fails to parse input for an operator.
2465ffd83dbSDimitry Andric static ExpectedProgress<RangeSelector>
2475ffd83dbSDimitry Andric parseRangeSelectorImpl(ParseState State) {
2485ffd83dbSDimitry Andric   auto Id = parseId(State);
2495ffd83dbSDimitry Andric   if (!Id)
2505ffd83dbSDimitry Andric     return Id.takeError();
2515ffd83dbSDimitry Andric 
2525ffd83dbSDimitry Andric   std::string OpName = std::move(Id->Value);
2535ffd83dbSDimitry Andric   if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
2545ffd83dbSDimitry Andric     return parseSingle(parseStringId, *Op, Id->State);
2555ffd83dbSDimitry Andric 
2565ffd83dbSDimitry Andric   if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
2575ffd83dbSDimitry Andric     return parseSingle(parseRangeSelectorImpl, *Op, Id->State);
2585ffd83dbSDimitry Andric 
2595ffd83dbSDimitry Andric   if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
2605ffd83dbSDimitry Andric     return parsePair(parseStringId, *Op, Id->State);
2615ffd83dbSDimitry Andric 
2625ffd83dbSDimitry Andric   if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
2635ffd83dbSDimitry Andric     return parsePair(parseRangeSelectorImpl, *Op, Id->State);
2645ffd83dbSDimitry Andric 
2655ffd83dbSDimitry Andric   return makeParseError(State, "unknown selector name: " + OpName);
2665ffd83dbSDimitry Andric }
2675ffd83dbSDimitry Andric 
2685ffd83dbSDimitry Andric Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
2695ffd83dbSDimitry Andric   ParseState State = {Input, Input};
2705ffd83dbSDimitry Andric   ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
2715ffd83dbSDimitry Andric   if (!Result)
2725ffd83dbSDimitry Andric     return Result.takeError();
2735ffd83dbSDimitry Andric   State = Result->State;
2745ffd83dbSDimitry Andric   // Discard any potentially trailing whitespace.
2755ffd83dbSDimitry Andric   State.Input = consumeWhitespace(State.Input);
2765ffd83dbSDimitry Andric   if (State.Input.empty())
2775ffd83dbSDimitry Andric     return Result->Value;
2785ffd83dbSDimitry Andric   return makeParseError(State, "unexpected input after selector");
2795ffd83dbSDimitry Andric }
280