15ffd83dbSDimitry Andric //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric 95ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/Parsing.h" 105ffd83dbSDimitry Andric #include "clang/AST/Expr.h" 115ffd83dbSDimitry Andric #include "clang/ASTMatchers/ASTMatchFinder.h" 125ffd83dbSDimitry Andric #include "clang/Basic/CharInfo.h" 135ffd83dbSDimitry Andric #include "clang/Basic/SourceLocation.h" 145ffd83dbSDimitry Andric #include "clang/Lex/Lexer.h" 155ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/RangeSelector.h" 165ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/SourceCode.h" 175ffd83dbSDimitry Andric #include "llvm/ADT/None.h" 185ffd83dbSDimitry Andric #include "llvm/ADT/StringMap.h" 195ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h" 205ffd83dbSDimitry Andric #include "llvm/Support/Errc.h" 215ffd83dbSDimitry Andric #include "llvm/Support/Error.h" 225ffd83dbSDimitry Andric #include <string> 235ffd83dbSDimitry Andric #include <utility> 245ffd83dbSDimitry Andric #include <vector> 255ffd83dbSDimitry Andric 265ffd83dbSDimitry Andric using namespace clang; 275ffd83dbSDimitry Andric using namespace transformer; 285ffd83dbSDimitry Andric 295ffd83dbSDimitry Andric // FIXME: This implementation is entirely separate from that of the AST 305ffd83dbSDimitry Andric // matchers. Given the similarity of the languages and uses of the two parsers, 315ffd83dbSDimitry Andric // the two should share a common parsing infrastructure, as should other 325ffd83dbSDimitry Andric // Transformer types. We intend to unify this implementation soon to share as 335ffd83dbSDimitry Andric // much as possible with the AST Matchers parsing. 345ffd83dbSDimitry Andric 355ffd83dbSDimitry Andric namespace { 365ffd83dbSDimitry Andric using llvm::Error; 375ffd83dbSDimitry Andric using llvm::Expected; 385ffd83dbSDimitry Andric 395ffd83dbSDimitry Andric template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); 405ffd83dbSDimitry Andric 415ffd83dbSDimitry Andric struct ParseState { 425ffd83dbSDimitry Andric // The remaining input to be processed. 435ffd83dbSDimitry Andric StringRef Input; 445ffd83dbSDimitry Andric // The original input. Not modified during parsing; only for reference in 455ffd83dbSDimitry Andric // error reporting. 465ffd83dbSDimitry Andric StringRef OriginalInput; 475ffd83dbSDimitry Andric }; 485ffd83dbSDimitry Andric 495ffd83dbSDimitry Andric // Represents an intermediate result returned by a parsing function. Functions 505ffd83dbSDimitry Andric // that don't generate values should use `llvm::None` 515ffd83dbSDimitry Andric template <typename ResultType> struct ParseProgress { 525ffd83dbSDimitry Andric ParseState State; 535ffd83dbSDimitry Andric // Intermediate result generated by the Parser. 545ffd83dbSDimitry Andric ResultType Value; 555ffd83dbSDimitry Andric }; 565ffd83dbSDimitry Andric 575ffd83dbSDimitry Andric template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; 585ffd83dbSDimitry Andric template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); 595ffd83dbSDimitry Andric 605ffd83dbSDimitry Andric class ParseError : public llvm::ErrorInfo<ParseError> { 615ffd83dbSDimitry Andric public: 625ffd83dbSDimitry Andric // Required field for all ErrorInfo derivatives. 635ffd83dbSDimitry Andric static char ID; 645ffd83dbSDimitry Andric 655ffd83dbSDimitry Andric ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) 665ffd83dbSDimitry Andric : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), 675ffd83dbSDimitry Andric Excerpt(std::move(InputExcerpt)) {} 685ffd83dbSDimitry Andric 695ffd83dbSDimitry Andric void log(llvm::raw_ostream &OS) const override { 705ffd83dbSDimitry Andric OS << "parse error at position (" << Pos << "): " << ErrorMsg 715ffd83dbSDimitry Andric << ": " + Excerpt; 725ffd83dbSDimitry Andric } 735ffd83dbSDimitry Andric 745ffd83dbSDimitry Andric std::error_code convertToErrorCode() const override { 755ffd83dbSDimitry Andric return llvm::inconvertibleErrorCode(); 765ffd83dbSDimitry Andric } 775ffd83dbSDimitry Andric 785ffd83dbSDimitry Andric // Position of the error in the input string. 795ffd83dbSDimitry Andric size_t Pos; 805ffd83dbSDimitry Andric std::string ErrorMsg; 815ffd83dbSDimitry Andric // Excerpt of the input starting at the error position. 825ffd83dbSDimitry Andric std::string Excerpt; 835ffd83dbSDimitry Andric }; 845ffd83dbSDimitry Andric 855ffd83dbSDimitry Andric char ParseError::ID; 865ffd83dbSDimitry Andric } // namespace 875ffd83dbSDimitry Andric 885ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string>> & 895ffd83dbSDimitry Andric getUnaryStringSelectors() { 905ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string>> M = { 915ffd83dbSDimitry Andric {"name", name}, 925ffd83dbSDimitry Andric {"node", node}, 935ffd83dbSDimitry Andric {"statement", statement}, 945ffd83dbSDimitry Andric {"statements", statements}, 955ffd83dbSDimitry Andric {"member", member}, 965ffd83dbSDimitry Andric {"callArgs", callArgs}, 975ffd83dbSDimitry Andric {"elseBranch", elseBranch}, 985ffd83dbSDimitry Andric {"initListElements", initListElements}}; 995ffd83dbSDimitry Andric return M; 1005ffd83dbSDimitry Andric } 1015ffd83dbSDimitry Andric 1025ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & 1035ffd83dbSDimitry Andric getUnaryRangeSelectors() { 1045ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { 1055ffd83dbSDimitry Andric {"before", before}, {"after", after}, {"expansion", expansion}}; 1065ffd83dbSDimitry Andric return M; 1075ffd83dbSDimitry Andric } 1085ffd83dbSDimitry Andric 1095ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & 1105ffd83dbSDimitry Andric getBinaryStringSelectors() { 1115ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { 112*e8d8bef9SDimitry Andric {"encloseNodes", encloseNodes}}; 1135ffd83dbSDimitry Andric return M; 1145ffd83dbSDimitry Andric } 1155ffd83dbSDimitry Andric 1165ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & 1175ffd83dbSDimitry Andric getBinaryRangeSelectors() { 1185ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> 119*e8d8bef9SDimitry Andric M = {{"enclose", enclose}, {"between", between}}; 1205ffd83dbSDimitry Andric return M; 1215ffd83dbSDimitry Andric } 1225ffd83dbSDimitry Andric 1235ffd83dbSDimitry Andric template <typename Element> 1245ffd83dbSDimitry Andric llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map, 1255ffd83dbSDimitry Andric llvm::StringRef Key) { 1265ffd83dbSDimitry Andric auto it = Map.find(Key); 1275ffd83dbSDimitry Andric if (it == Map.end()) 1285ffd83dbSDimitry Andric return llvm::None; 1295ffd83dbSDimitry Andric return it->second; 1305ffd83dbSDimitry Andric } 1315ffd83dbSDimitry Andric 1325ffd83dbSDimitry Andric template <typename ResultType> 1335ffd83dbSDimitry Andric ParseProgress<ResultType> makeParseProgress(ParseState State, 1345ffd83dbSDimitry Andric ResultType Result) { 1355ffd83dbSDimitry Andric return ParseProgress<ResultType>{State, std::move(Result)}; 1365ffd83dbSDimitry Andric } 1375ffd83dbSDimitry Andric 1385ffd83dbSDimitry Andric static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { 1395ffd83dbSDimitry Andric size_t Pos = S.OriginalInput.size() - S.Input.size(); 1405ffd83dbSDimitry Andric return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg), 1415ffd83dbSDimitry Andric S.OriginalInput.substr(Pos, 20).str()); 1425ffd83dbSDimitry Andric } 1435ffd83dbSDimitry Andric 1445ffd83dbSDimitry Andric // Returns a new ParseState that advances \c S by \c N characters. 1455ffd83dbSDimitry Andric static ParseState advance(ParseState S, size_t N) { 1465ffd83dbSDimitry Andric S.Input = S.Input.drop_front(N); 1475ffd83dbSDimitry Andric return S; 1485ffd83dbSDimitry Andric } 1495ffd83dbSDimitry Andric 1505ffd83dbSDimitry Andric static StringRef consumeWhitespace(StringRef S) { 151*e8d8bef9SDimitry Andric return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); }); 1525ffd83dbSDimitry Andric } 1535ffd83dbSDimitry Andric 1545ffd83dbSDimitry Andric // Parses a single expected character \c c from \c State, skipping preceding 1555ffd83dbSDimitry Andric // whitespace. Error if the expected character isn't found. 1565ffd83dbSDimitry Andric static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) { 1575ffd83dbSDimitry Andric State.Input = consumeWhitespace(State.Input); 1585ffd83dbSDimitry Andric if (State.Input.empty() || State.Input.front() != c) 1595ffd83dbSDimitry Andric return makeParseError(State, 1605ffd83dbSDimitry Andric ("expected char not found: " + llvm::Twine(c)).str()); 1615ffd83dbSDimitry Andric return makeParseProgress(advance(State, 1), llvm::None); 1625ffd83dbSDimitry Andric } 1635ffd83dbSDimitry Andric 1645ffd83dbSDimitry Andric // Parses an identitifer "token" -- handles preceding whitespace. 1655ffd83dbSDimitry Andric static ExpectedProgress<std::string> parseId(ParseState State) { 1665ffd83dbSDimitry Andric State.Input = consumeWhitespace(State.Input); 1675ffd83dbSDimitry Andric auto Id = State.Input.take_while( 168*e8d8bef9SDimitry Andric [](char c) { return isASCII(c) && isIdentifierBody(c); }); 1695ffd83dbSDimitry Andric if (Id.empty()) 1705ffd83dbSDimitry Andric return makeParseError(State, "failed to parse name"); 1715ffd83dbSDimitry Andric return makeParseProgress(advance(State, Id.size()), Id.str()); 1725ffd83dbSDimitry Andric } 1735ffd83dbSDimitry Andric 1745ffd83dbSDimitry Andric // For consistency with the AST matcher parser and C++ code, node ids are 1755ffd83dbSDimitry Andric // written as strings. However, we do not support escaping in the string. 1765ffd83dbSDimitry Andric static ExpectedProgress<std::string> parseStringId(ParseState State) { 1775ffd83dbSDimitry Andric State.Input = consumeWhitespace(State.Input); 1785ffd83dbSDimitry Andric if (State.Input.empty()) 1795ffd83dbSDimitry Andric return makeParseError(State, "unexpected end of input"); 1805ffd83dbSDimitry Andric if (!State.Input.consume_front("\"")) 1815ffd83dbSDimitry Andric return makeParseError( 1825ffd83dbSDimitry Andric State, 1835ffd83dbSDimitry Andric "expecting string, but encountered other character or end of input"); 1845ffd83dbSDimitry Andric 1855ffd83dbSDimitry Andric StringRef Id = State.Input.take_until([](char c) { return c == '"'; }); 1865ffd83dbSDimitry Andric if (State.Input.size() == Id.size()) 1875ffd83dbSDimitry Andric return makeParseError(State, "unterminated string"); 1885ffd83dbSDimitry Andric // Advance past the trailing quote as well. 1895ffd83dbSDimitry Andric return makeParseProgress(advance(State, Id.size() + 1), Id.str()); 1905ffd83dbSDimitry Andric } 1915ffd83dbSDimitry Andric 1925ffd83dbSDimitry Andric // Parses a single element surrounded by parens. `Op` is applied to the parsed 1935ffd83dbSDimitry Andric // result to create the result of this function call. 1945ffd83dbSDimitry Andric template <typename T> 1955ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, 1965ffd83dbSDimitry Andric RangeSelectorOp<T> Op, 1975ffd83dbSDimitry Andric ParseState State) { 1985ffd83dbSDimitry Andric auto P = parseChar('(', State); 1995ffd83dbSDimitry Andric if (!P) 2005ffd83dbSDimitry Andric return P.takeError(); 2015ffd83dbSDimitry Andric 2025ffd83dbSDimitry Andric auto E = ParseElement(P->State); 2035ffd83dbSDimitry Andric if (!E) 2045ffd83dbSDimitry Andric return E.takeError(); 2055ffd83dbSDimitry Andric 2065ffd83dbSDimitry Andric P = parseChar(')', E->State); 2075ffd83dbSDimitry Andric if (!P) 2085ffd83dbSDimitry Andric return P.takeError(); 2095ffd83dbSDimitry Andric 2105ffd83dbSDimitry Andric return makeParseProgress(P->State, Op(std::move(E->Value))); 2115ffd83dbSDimitry Andric } 2125ffd83dbSDimitry Andric 2135ffd83dbSDimitry Andric // Parses a pair of elements surrounded by parens and separated by comma. `Op` 2145ffd83dbSDimitry Andric // is applied to the parsed results to create the result of this function call. 2155ffd83dbSDimitry Andric template <typename T> 2165ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, 2175ffd83dbSDimitry Andric RangeSelectorOp<T, T> Op, 2185ffd83dbSDimitry Andric ParseState State) { 2195ffd83dbSDimitry Andric auto P = parseChar('(', State); 2205ffd83dbSDimitry Andric if (!P) 2215ffd83dbSDimitry Andric return P.takeError(); 2225ffd83dbSDimitry Andric 2235ffd83dbSDimitry Andric auto Left = ParseElement(P->State); 2245ffd83dbSDimitry Andric if (!Left) 2255ffd83dbSDimitry Andric return Left.takeError(); 2265ffd83dbSDimitry Andric 2275ffd83dbSDimitry Andric P = parseChar(',', Left->State); 2285ffd83dbSDimitry Andric if (!P) 2295ffd83dbSDimitry Andric return P.takeError(); 2305ffd83dbSDimitry Andric 2315ffd83dbSDimitry Andric auto Right = ParseElement(P->State); 2325ffd83dbSDimitry Andric if (!Right) 2335ffd83dbSDimitry Andric return Right.takeError(); 2345ffd83dbSDimitry Andric 2355ffd83dbSDimitry Andric P = parseChar(')', Right->State); 2365ffd83dbSDimitry Andric if (!P) 2375ffd83dbSDimitry Andric return P.takeError(); 2385ffd83dbSDimitry Andric 2395ffd83dbSDimitry Andric return makeParseProgress(P->State, 2405ffd83dbSDimitry Andric Op(std::move(Left->Value), std::move(Right->Value))); 2415ffd83dbSDimitry Andric } 2425ffd83dbSDimitry Andric 2435ffd83dbSDimitry Andric // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or 2445ffd83dbSDimitry Andric // Id operator). Returns StencilType representing the operator on success and 2455ffd83dbSDimitry Andric // error if it fails to parse input for an operator. 2465ffd83dbSDimitry Andric static ExpectedProgress<RangeSelector> 2475ffd83dbSDimitry Andric parseRangeSelectorImpl(ParseState State) { 2485ffd83dbSDimitry Andric auto Id = parseId(State); 2495ffd83dbSDimitry Andric if (!Id) 2505ffd83dbSDimitry Andric return Id.takeError(); 2515ffd83dbSDimitry Andric 2525ffd83dbSDimitry Andric std::string OpName = std::move(Id->Value); 2535ffd83dbSDimitry Andric if (auto Op = findOptional(getUnaryStringSelectors(), OpName)) 2545ffd83dbSDimitry Andric return parseSingle(parseStringId, *Op, Id->State); 2555ffd83dbSDimitry Andric 2565ffd83dbSDimitry Andric if (auto Op = findOptional(getUnaryRangeSelectors(), OpName)) 2575ffd83dbSDimitry Andric return parseSingle(parseRangeSelectorImpl, *Op, Id->State); 2585ffd83dbSDimitry Andric 2595ffd83dbSDimitry Andric if (auto Op = findOptional(getBinaryStringSelectors(), OpName)) 2605ffd83dbSDimitry Andric return parsePair(parseStringId, *Op, Id->State); 2615ffd83dbSDimitry Andric 2625ffd83dbSDimitry Andric if (auto Op = findOptional(getBinaryRangeSelectors(), OpName)) 2635ffd83dbSDimitry Andric return parsePair(parseRangeSelectorImpl, *Op, Id->State); 2645ffd83dbSDimitry Andric 2655ffd83dbSDimitry Andric return makeParseError(State, "unknown selector name: " + OpName); 2665ffd83dbSDimitry Andric } 2675ffd83dbSDimitry Andric 2685ffd83dbSDimitry Andric Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { 2695ffd83dbSDimitry Andric ParseState State = {Input, Input}; 2705ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); 2715ffd83dbSDimitry Andric if (!Result) 2725ffd83dbSDimitry Andric return Result.takeError(); 2735ffd83dbSDimitry Andric State = Result->State; 2745ffd83dbSDimitry Andric // Discard any potentially trailing whitespace. 2755ffd83dbSDimitry Andric State.Input = consumeWhitespace(State.Input); 2765ffd83dbSDimitry Andric if (State.Input.empty()) 2775ffd83dbSDimitry Andric return Result->Value; 2785ffd83dbSDimitry Andric return makeParseError(State, "unexpected input after selector"); 2795ffd83dbSDimitry Andric } 280