1*5ffd83dbSDimitry Andric //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// 2*5ffd83dbSDimitry Andric // 3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5ffd83dbSDimitry Andric // 7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 8*5ffd83dbSDimitry Andric 9*5ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/Parsing.h" 10*5ffd83dbSDimitry Andric #include "clang/AST/Expr.h" 11*5ffd83dbSDimitry Andric #include "clang/ASTMatchers/ASTMatchFinder.h" 12*5ffd83dbSDimitry Andric #include "clang/Basic/CharInfo.h" 13*5ffd83dbSDimitry Andric #include "clang/Basic/SourceLocation.h" 14*5ffd83dbSDimitry Andric #include "clang/Lex/Lexer.h" 15*5ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/RangeSelector.h" 16*5ffd83dbSDimitry Andric #include "clang/Tooling/Transformer/SourceCode.h" 17*5ffd83dbSDimitry Andric #include "llvm/ADT/None.h" 18*5ffd83dbSDimitry Andric #include "llvm/ADT/StringMap.h" 19*5ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h" 20*5ffd83dbSDimitry Andric #include "llvm/Support/Errc.h" 21*5ffd83dbSDimitry Andric #include "llvm/Support/Error.h" 22*5ffd83dbSDimitry Andric #include <string> 23*5ffd83dbSDimitry Andric #include <utility> 24*5ffd83dbSDimitry Andric #include <vector> 25*5ffd83dbSDimitry Andric 26*5ffd83dbSDimitry Andric using namespace clang; 27*5ffd83dbSDimitry Andric using namespace transformer; 28*5ffd83dbSDimitry Andric 29*5ffd83dbSDimitry Andric // FIXME: This implementation is entirely separate from that of the AST 30*5ffd83dbSDimitry Andric // matchers. Given the similarity of the languages and uses of the two parsers, 31*5ffd83dbSDimitry Andric // the two should share a common parsing infrastructure, as should other 32*5ffd83dbSDimitry Andric // Transformer types. We intend to unify this implementation soon to share as 33*5ffd83dbSDimitry Andric // much as possible with the AST Matchers parsing. 34*5ffd83dbSDimitry Andric 35*5ffd83dbSDimitry Andric namespace { 36*5ffd83dbSDimitry Andric using llvm::Error; 37*5ffd83dbSDimitry Andric using llvm::Expected; 38*5ffd83dbSDimitry Andric 39*5ffd83dbSDimitry Andric template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); 40*5ffd83dbSDimitry Andric 41*5ffd83dbSDimitry Andric struct ParseState { 42*5ffd83dbSDimitry Andric // The remaining input to be processed. 43*5ffd83dbSDimitry Andric StringRef Input; 44*5ffd83dbSDimitry Andric // The original input. Not modified during parsing; only for reference in 45*5ffd83dbSDimitry Andric // error reporting. 46*5ffd83dbSDimitry Andric StringRef OriginalInput; 47*5ffd83dbSDimitry Andric }; 48*5ffd83dbSDimitry Andric 49*5ffd83dbSDimitry Andric // Represents an intermediate result returned by a parsing function. Functions 50*5ffd83dbSDimitry Andric // that don't generate values should use `llvm::None` 51*5ffd83dbSDimitry Andric template <typename ResultType> struct ParseProgress { 52*5ffd83dbSDimitry Andric ParseState State; 53*5ffd83dbSDimitry Andric // Intermediate result generated by the Parser. 54*5ffd83dbSDimitry Andric ResultType Value; 55*5ffd83dbSDimitry Andric }; 56*5ffd83dbSDimitry Andric 57*5ffd83dbSDimitry Andric template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; 58*5ffd83dbSDimitry Andric template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); 59*5ffd83dbSDimitry Andric 60*5ffd83dbSDimitry Andric class ParseError : public llvm::ErrorInfo<ParseError> { 61*5ffd83dbSDimitry Andric public: 62*5ffd83dbSDimitry Andric // Required field for all ErrorInfo derivatives. 63*5ffd83dbSDimitry Andric static char ID; 64*5ffd83dbSDimitry Andric 65*5ffd83dbSDimitry Andric ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) 66*5ffd83dbSDimitry Andric : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), 67*5ffd83dbSDimitry Andric Excerpt(std::move(InputExcerpt)) {} 68*5ffd83dbSDimitry Andric 69*5ffd83dbSDimitry Andric void log(llvm::raw_ostream &OS) const override { 70*5ffd83dbSDimitry Andric OS << "parse error at position (" << Pos << "): " << ErrorMsg 71*5ffd83dbSDimitry Andric << ": " + Excerpt; 72*5ffd83dbSDimitry Andric } 73*5ffd83dbSDimitry Andric 74*5ffd83dbSDimitry Andric std::error_code convertToErrorCode() const override { 75*5ffd83dbSDimitry Andric return llvm::inconvertibleErrorCode(); 76*5ffd83dbSDimitry Andric } 77*5ffd83dbSDimitry Andric 78*5ffd83dbSDimitry Andric // Position of the error in the input string. 79*5ffd83dbSDimitry Andric size_t Pos; 80*5ffd83dbSDimitry Andric std::string ErrorMsg; 81*5ffd83dbSDimitry Andric // Excerpt of the input starting at the error position. 82*5ffd83dbSDimitry Andric std::string Excerpt; 83*5ffd83dbSDimitry Andric }; 84*5ffd83dbSDimitry Andric 85*5ffd83dbSDimitry Andric char ParseError::ID; 86*5ffd83dbSDimitry Andric } // namespace 87*5ffd83dbSDimitry Andric 88*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string>> & 89*5ffd83dbSDimitry Andric getUnaryStringSelectors() { 90*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string>> M = { 91*5ffd83dbSDimitry Andric {"name", name}, 92*5ffd83dbSDimitry Andric {"node", node}, 93*5ffd83dbSDimitry Andric {"statement", statement}, 94*5ffd83dbSDimitry Andric {"statements", statements}, 95*5ffd83dbSDimitry Andric {"member", member}, 96*5ffd83dbSDimitry Andric {"callArgs", callArgs}, 97*5ffd83dbSDimitry Andric {"elseBranch", elseBranch}, 98*5ffd83dbSDimitry Andric {"initListElements", initListElements}}; 99*5ffd83dbSDimitry Andric return M; 100*5ffd83dbSDimitry Andric } 101*5ffd83dbSDimitry Andric 102*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & 103*5ffd83dbSDimitry Andric getUnaryRangeSelectors() { 104*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { 105*5ffd83dbSDimitry Andric {"before", before}, {"after", after}, {"expansion", expansion}}; 106*5ffd83dbSDimitry Andric return M; 107*5ffd83dbSDimitry Andric } 108*5ffd83dbSDimitry Andric 109*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & 110*5ffd83dbSDimitry Andric getBinaryStringSelectors() { 111*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { 112*5ffd83dbSDimitry Andric {"encloseNodes", range}}; 113*5ffd83dbSDimitry Andric return M; 114*5ffd83dbSDimitry Andric } 115*5ffd83dbSDimitry Andric 116*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & 117*5ffd83dbSDimitry Andric getBinaryRangeSelectors() { 118*5ffd83dbSDimitry Andric static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> 119*5ffd83dbSDimitry Andric M = {{"enclose", range}}; 120*5ffd83dbSDimitry Andric return M; 121*5ffd83dbSDimitry Andric } 122*5ffd83dbSDimitry Andric 123*5ffd83dbSDimitry Andric template <typename Element> 124*5ffd83dbSDimitry Andric llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map, 125*5ffd83dbSDimitry Andric llvm::StringRef Key) { 126*5ffd83dbSDimitry Andric auto it = Map.find(Key); 127*5ffd83dbSDimitry Andric if (it == Map.end()) 128*5ffd83dbSDimitry Andric return llvm::None; 129*5ffd83dbSDimitry Andric return it->second; 130*5ffd83dbSDimitry Andric } 131*5ffd83dbSDimitry Andric 132*5ffd83dbSDimitry Andric template <typename ResultType> 133*5ffd83dbSDimitry Andric ParseProgress<ResultType> makeParseProgress(ParseState State, 134*5ffd83dbSDimitry Andric ResultType Result) { 135*5ffd83dbSDimitry Andric return ParseProgress<ResultType>{State, std::move(Result)}; 136*5ffd83dbSDimitry Andric } 137*5ffd83dbSDimitry Andric 138*5ffd83dbSDimitry Andric static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { 139*5ffd83dbSDimitry Andric size_t Pos = S.OriginalInput.size() - S.Input.size(); 140*5ffd83dbSDimitry Andric return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg), 141*5ffd83dbSDimitry Andric S.OriginalInput.substr(Pos, 20).str()); 142*5ffd83dbSDimitry Andric } 143*5ffd83dbSDimitry Andric 144*5ffd83dbSDimitry Andric // Returns a new ParseState that advances \c S by \c N characters. 145*5ffd83dbSDimitry Andric static ParseState advance(ParseState S, size_t N) { 146*5ffd83dbSDimitry Andric S.Input = S.Input.drop_front(N); 147*5ffd83dbSDimitry Andric return S; 148*5ffd83dbSDimitry Andric } 149*5ffd83dbSDimitry Andric 150*5ffd83dbSDimitry Andric static StringRef consumeWhitespace(StringRef S) { 151*5ffd83dbSDimitry Andric return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); }); 152*5ffd83dbSDimitry Andric } 153*5ffd83dbSDimitry Andric 154*5ffd83dbSDimitry Andric // Parses a single expected character \c c from \c State, skipping preceding 155*5ffd83dbSDimitry Andric // whitespace. Error if the expected character isn't found. 156*5ffd83dbSDimitry Andric static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) { 157*5ffd83dbSDimitry Andric State.Input = consumeWhitespace(State.Input); 158*5ffd83dbSDimitry Andric if (State.Input.empty() || State.Input.front() != c) 159*5ffd83dbSDimitry Andric return makeParseError(State, 160*5ffd83dbSDimitry Andric ("expected char not found: " + llvm::Twine(c)).str()); 161*5ffd83dbSDimitry Andric return makeParseProgress(advance(State, 1), llvm::None); 162*5ffd83dbSDimitry Andric } 163*5ffd83dbSDimitry Andric 164*5ffd83dbSDimitry Andric // Parses an identitifer "token" -- handles preceding whitespace. 165*5ffd83dbSDimitry Andric static ExpectedProgress<std::string> parseId(ParseState State) { 166*5ffd83dbSDimitry Andric State.Input = consumeWhitespace(State.Input); 167*5ffd83dbSDimitry Andric auto Id = State.Input.take_while( 168*5ffd83dbSDimitry Andric [](char c) { return c >= 0 && isIdentifierBody(c); }); 169*5ffd83dbSDimitry Andric if (Id.empty()) 170*5ffd83dbSDimitry Andric return makeParseError(State, "failed to parse name"); 171*5ffd83dbSDimitry Andric return makeParseProgress(advance(State, Id.size()), Id.str()); 172*5ffd83dbSDimitry Andric } 173*5ffd83dbSDimitry Andric 174*5ffd83dbSDimitry Andric // For consistency with the AST matcher parser and C++ code, node ids are 175*5ffd83dbSDimitry Andric // written as strings. However, we do not support escaping in the string. 176*5ffd83dbSDimitry Andric static ExpectedProgress<std::string> parseStringId(ParseState State) { 177*5ffd83dbSDimitry Andric State.Input = consumeWhitespace(State.Input); 178*5ffd83dbSDimitry Andric if (State.Input.empty()) 179*5ffd83dbSDimitry Andric return makeParseError(State, "unexpected end of input"); 180*5ffd83dbSDimitry Andric if (!State.Input.consume_front("\"")) 181*5ffd83dbSDimitry Andric return makeParseError( 182*5ffd83dbSDimitry Andric State, 183*5ffd83dbSDimitry Andric "expecting string, but encountered other character or end of input"); 184*5ffd83dbSDimitry Andric 185*5ffd83dbSDimitry Andric StringRef Id = State.Input.take_until([](char c) { return c == '"'; }); 186*5ffd83dbSDimitry Andric if (State.Input.size() == Id.size()) 187*5ffd83dbSDimitry Andric return makeParseError(State, "unterminated string"); 188*5ffd83dbSDimitry Andric // Advance past the trailing quote as well. 189*5ffd83dbSDimitry Andric return makeParseProgress(advance(State, Id.size() + 1), Id.str()); 190*5ffd83dbSDimitry Andric } 191*5ffd83dbSDimitry Andric 192*5ffd83dbSDimitry Andric // Parses a single element surrounded by parens. `Op` is applied to the parsed 193*5ffd83dbSDimitry Andric // result to create the result of this function call. 194*5ffd83dbSDimitry Andric template <typename T> 195*5ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, 196*5ffd83dbSDimitry Andric RangeSelectorOp<T> Op, 197*5ffd83dbSDimitry Andric ParseState State) { 198*5ffd83dbSDimitry Andric auto P = parseChar('(', State); 199*5ffd83dbSDimitry Andric if (!P) 200*5ffd83dbSDimitry Andric return P.takeError(); 201*5ffd83dbSDimitry Andric 202*5ffd83dbSDimitry Andric auto E = ParseElement(P->State); 203*5ffd83dbSDimitry Andric if (!E) 204*5ffd83dbSDimitry Andric return E.takeError(); 205*5ffd83dbSDimitry Andric 206*5ffd83dbSDimitry Andric P = parseChar(')', E->State); 207*5ffd83dbSDimitry Andric if (!P) 208*5ffd83dbSDimitry Andric return P.takeError(); 209*5ffd83dbSDimitry Andric 210*5ffd83dbSDimitry Andric return makeParseProgress(P->State, Op(std::move(E->Value))); 211*5ffd83dbSDimitry Andric } 212*5ffd83dbSDimitry Andric 213*5ffd83dbSDimitry Andric // Parses a pair of elements surrounded by parens and separated by comma. `Op` 214*5ffd83dbSDimitry Andric // is applied to the parsed results to create the result of this function call. 215*5ffd83dbSDimitry Andric template <typename T> 216*5ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, 217*5ffd83dbSDimitry Andric RangeSelectorOp<T, T> Op, 218*5ffd83dbSDimitry Andric ParseState State) { 219*5ffd83dbSDimitry Andric auto P = parseChar('(', State); 220*5ffd83dbSDimitry Andric if (!P) 221*5ffd83dbSDimitry Andric return P.takeError(); 222*5ffd83dbSDimitry Andric 223*5ffd83dbSDimitry Andric auto Left = ParseElement(P->State); 224*5ffd83dbSDimitry Andric if (!Left) 225*5ffd83dbSDimitry Andric return Left.takeError(); 226*5ffd83dbSDimitry Andric 227*5ffd83dbSDimitry Andric P = parseChar(',', Left->State); 228*5ffd83dbSDimitry Andric if (!P) 229*5ffd83dbSDimitry Andric return P.takeError(); 230*5ffd83dbSDimitry Andric 231*5ffd83dbSDimitry Andric auto Right = ParseElement(P->State); 232*5ffd83dbSDimitry Andric if (!Right) 233*5ffd83dbSDimitry Andric return Right.takeError(); 234*5ffd83dbSDimitry Andric 235*5ffd83dbSDimitry Andric P = parseChar(')', Right->State); 236*5ffd83dbSDimitry Andric if (!P) 237*5ffd83dbSDimitry Andric return P.takeError(); 238*5ffd83dbSDimitry Andric 239*5ffd83dbSDimitry Andric return makeParseProgress(P->State, 240*5ffd83dbSDimitry Andric Op(std::move(Left->Value), std::move(Right->Value))); 241*5ffd83dbSDimitry Andric } 242*5ffd83dbSDimitry Andric 243*5ffd83dbSDimitry Andric // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or 244*5ffd83dbSDimitry Andric // Id operator). Returns StencilType representing the operator on success and 245*5ffd83dbSDimitry Andric // error if it fails to parse input for an operator. 246*5ffd83dbSDimitry Andric static ExpectedProgress<RangeSelector> 247*5ffd83dbSDimitry Andric parseRangeSelectorImpl(ParseState State) { 248*5ffd83dbSDimitry Andric auto Id = parseId(State); 249*5ffd83dbSDimitry Andric if (!Id) 250*5ffd83dbSDimitry Andric return Id.takeError(); 251*5ffd83dbSDimitry Andric 252*5ffd83dbSDimitry Andric std::string OpName = std::move(Id->Value); 253*5ffd83dbSDimitry Andric if (auto Op = findOptional(getUnaryStringSelectors(), OpName)) 254*5ffd83dbSDimitry Andric return parseSingle(parseStringId, *Op, Id->State); 255*5ffd83dbSDimitry Andric 256*5ffd83dbSDimitry Andric if (auto Op = findOptional(getUnaryRangeSelectors(), OpName)) 257*5ffd83dbSDimitry Andric return parseSingle(parseRangeSelectorImpl, *Op, Id->State); 258*5ffd83dbSDimitry Andric 259*5ffd83dbSDimitry Andric if (auto Op = findOptional(getBinaryStringSelectors(), OpName)) 260*5ffd83dbSDimitry Andric return parsePair(parseStringId, *Op, Id->State); 261*5ffd83dbSDimitry Andric 262*5ffd83dbSDimitry Andric if (auto Op = findOptional(getBinaryRangeSelectors(), OpName)) 263*5ffd83dbSDimitry Andric return parsePair(parseRangeSelectorImpl, *Op, Id->State); 264*5ffd83dbSDimitry Andric 265*5ffd83dbSDimitry Andric return makeParseError(State, "unknown selector name: " + OpName); 266*5ffd83dbSDimitry Andric } 267*5ffd83dbSDimitry Andric 268*5ffd83dbSDimitry Andric Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { 269*5ffd83dbSDimitry Andric ParseState State = {Input, Input}; 270*5ffd83dbSDimitry Andric ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); 271*5ffd83dbSDimitry Andric if (!Result) 272*5ffd83dbSDimitry Andric return Result.takeError(); 273*5ffd83dbSDimitry Andric State = Result->State; 274*5ffd83dbSDimitry Andric // Discard any potentially trailing whitespace. 275*5ffd83dbSDimitry Andric State.Input = consumeWhitespace(State.Input); 276*5ffd83dbSDimitry Andric if (State.Input.empty()) 277*5ffd83dbSDimitry Andric return Result->Value; 278*5ffd83dbSDimitry Andric return makeParseError(State, "unexpected input after selector"); 279*5ffd83dbSDimitry Andric } 280