1 //===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "clang/Tooling/Transformer/Parsing.h" 10 #include "clang/AST/Expr.h" 11 #include "clang/ASTMatchers/ASTMatchFinder.h" 12 #include "clang/Basic/CharInfo.h" 13 #include "clang/Basic/SourceLocation.h" 14 #include "clang/Lex/Lexer.h" 15 #include "clang/Tooling/Transformer/RangeSelector.h" 16 #include "clang/Tooling/Transformer/SourceCode.h" 17 #include "llvm/ADT/StringMap.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/Support/Errc.h" 20 #include "llvm/Support/Error.h" 21 #include <string> 22 #include <utility> 23 #include <vector> 24 25 using namespace clang; 26 using namespace transformer; 27 28 // FIXME: This implementation is entirely separate from that of the AST 29 // matchers. Given the similarity of the languages and uses of the two parsers, 30 // the two should share a common parsing infrastructure, as should other 31 // Transformer types. We intend to unify this implementation soon to share as 32 // much as possible with the AST Matchers parsing. 33 34 namespace { 35 using llvm::Expected; 36 37 template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...); 38 39 struct ParseState { 40 // The remaining input to be processed. 41 StringRef Input; 42 // The original input. Not modified during parsing; only for reference in 43 // error reporting. 44 StringRef OriginalInput; 45 }; 46 47 // Represents an intermediate result returned by a parsing function. Functions 48 // that don't generate values should use `std::nullopt` 49 template <typename ResultType> struct ParseProgress { 50 ParseState State; 51 // Intermediate result generated by the Parser. 52 ResultType Value; 53 }; 54 55 template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>; 56 template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState); 57 58 class ParseError : public llvm::ErrorInfo<ParseError> { 59 public: 60 // Required field for all ErrorInfo derivatives. 61 static char ID; 62 63 ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt) 64 : Pos(Pos), ErrorMsg(std::move(ErrorMsg)), 65 Excerpt(std::move(InputExcerpt)) {} 66 67 void log(llvm::raw_ostream &OS) const override { 68 OS << "parse error at position (" << Pos << "): " << ErrorMsg 69 << ": " + Excerpt; 70 } 71 72 std::error_code convertToErrorCode() const override { 73 return llvm::inconvertibleErrorCode(); 74 } 75 76 // Position of the error in the input string. 77 size_t Pos; 78 std::string ErrorMsg; 79 // Excerpt of the input starting at the error position. 80 std::string Excerpt; 81 }; 82 83 char ParseError::ID; 84 } // namespace 85 86 static const llvm::StringMap<RangeSelectorOp<std::string>> & 87 getUnaryStringSelectors() { 88 static const llvm::StringMap<RangeSelectorOp<std::string>> M = { 89 {"name", name}, 90 {"node", node}, 91 {"statement", statement}, 92 {"statements", statements}, 93 {"member", member}, 94 {"callArgs", callArgs}, 95 {"elseBranch", elseBranch}, 96 {"initListElements", initListElements}}; 97 return M; 98 } 99 100 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> & 101 getUnaryRangeSelectors() { 102 static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = { 103 {"before", before}, {"after", after}, {"expansion", expansion}}; 104 return M; 105 } 106 107 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> & 108 getBinaryStringSelectors() { 109 static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = { 110 {"encloseNodes", encloseNodes}}; 111 return M; 112 } 113 114 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> & 115 getBinaryRangeSelectors() { 116 static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> 117 M = {{"enclose", enclose}, {"between", between}}; 118 return M; 119 } 120 121 template <typename Element> 122 llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map, 123 llvm::StringRef Key) { 124 auto it = Map.find(Key); 125 if (it == Map.end()) 126 return std::nullopt; 127 return it->second; 128 } 129 130 template <typename ResultType> 131 ParseProgress<ResultType> makeParseProgress(ParseState State, 132 ResultType Result) { 133 return ParseProgress<ResultType>{State, std::move(Result)}; 134 } 135 136 static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) { 137 size_t Pos = S.OriginalInput.size() - S.Input.size(); 138 return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg), 139 S.OriginalInput.substr(Pos, 20).str()); 140 } 141 142 // Returns a new ParseState that advances \c S by \c N characters. 143 static ParseState advance(ParseState S, size_t N) { 144 S.Input = S.Input.drop_front(N); 145 return S; 146 } 147 148 static StringRef consumeWhitespace(StringRef S) { 149 return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); }); 150 } 151 152 // Parses a single expected character \c c from \c State, skipping preceding 153 // whitespace. Error if the expected character isn't found. 154 static ExpectedProgress<std::nullopt_t> parseChar(char c, ParseState State) { 155 State.Input = consumeWhitespace(State.Input); 156 if (State.Input.empty() || State.Input.front() != c) 157 return makeParseError(State, 158 ("expected char not found: " + llvm::Twine(c)).str()); 159 return makeParseProgress(advance(State, 1), std::nullopt); 160 } 161 162 // Parses an identitifer "token" -- handles preceding whitespace. 163 static ExpectedProgress<std::string> parseId(ParseState State) { 164 State.Input = consumeWhitespace(State.Input); 165 auto Id = State.Input.take_while( 166 [](char c) { return isASCII(c) && isAsciiIdentifierContinue(c); }); 167 if (Id.empty()) 168 return makeParseError(State, "failed to parse name"); 169 return makeParseProgress(advance(State, Id.size()), Id.str()); 170 } 171 172 // For consistency with the AST matcher parser and C++ code, node ids are 173 // written as strings. However, we do not support escaping in the string. 174 static ExpectedProgress<std::string> parseStringId(ParseState State) { 175 State.Input = consumeWhitespace(State.Input); 176 if (State.Input.empty()) 177 return makeParseError(State, "unexpected end of input"); 178 if (!State.Input.consume_front("\"")) 179 return makeParseError( 180 State, 181 "expecting string, but encountered other character or end of input"); 182 183 StringRef Id = State.Input.take_until([](char c) { return c == '"'; }); 184 if (State.Input.size() == Id.size()) 185 return makeParseError(State, "unterminated string"); 186 // Advance past the trailing quote as well. 187 return makeParseProgress(advance(State, Id.size() + 1), Id.str()); 188 } 189 190 // Parses a single element surrounded by parens. `Op` is applied to the parsed 191 // result to create the result of this function call. 192 template <typename T> 193 ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement, 194 RangeSelectorOp<T> Op, 195 ParseState State) { 196 auto P = parseChar('(', State); 197 if (!P) 198 return P.takeError(); 199 200 auto E = ParseElement(P->State); 201 if (!E) 202 return E.takeError(); 203 204 P = parseChar(')', E->State); 205 if (!P) 206 return P.takeError(); 207 208 return makeParseProgress(P->State, Op(std::move(E->Value))); 209 } 210 211 // Parses a pair of elements surrounded by parens and separated by comma. `Op` 212 // is applied to the parsed results to create the result of this function call. 213 template <typename T> 214 ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement, 215 RangeSelectorOp<T, T> Op, 216 ParseState State) { 217 auto P = parseChar('(', State); 218 if (!P) 219 return P.takeError(); 220 221 auto Left = ParseElement(P->State); 222 if (!Left) 223 return Left.takeError(); 224 225 P = parseChar(',', Left->State); 226 if (!P) 227 return P.takeError(); 228 229 auto Right = ParseElement(P->State); 230 if (!Right) 231 return Right.takeError(); 232 233 P = parseChar(')', Right->State); 234 if (!P) 235 return P.takeError(); 236 237 return makeParseProgress(P->State, 238 Op(std::move(Left->Value), std::move(Right->Value))); 239 } 240 241 // Parses input for a stencil operator(single arg ops like AsValue, MemberOp or 242 // Id operator). Returns StencilType representing the operator on success and 243 // error if it fails to parse input for an operator. 244 static ExpectedProgress<RangeSelector> 245 parseRangeSelectorImpl(ParseState State) { 246 auto Id = parseId(State); 247 if (!Id) 248 return Id.takeError(); 249 250 std::string OpName = std::move(Id->Value); 251 if (auto Op = findOptional(getUnaryStringSelectors(), OpName)) 252 return parseSingle(parseStringId, *Op, Id->State); 253 254 if (auto Op = findOptional(getUnaryRangeSelectors(), OpName)) 255 return parseSingle(parseRangeSelectorImpl, *Op, Id->State); 256 257 if (auto Op = findOptional(getBinaryStringSelectors(), OpName)) 258 return parsePair(parseStringId, *Op, Id->State); 259 260 if (auto Op = findOptional(getBinaryRangeSelectors(), OpName)) 261 return parsePair(parseRangeSelectorImpl, *Op, Id->State); 262 263 return makeParseError(State, "unknown selector name: " + OpName); 264 } 265 266 Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) { 267 ParseState State = {Input, Input}; 268 ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State); 269 if (!Result) 270 return Result.takeError(); 271 State = Result->State; 272 // Discard any potentially trailing whitespace. 273 State.Input = consumeWhitespace(State.Input); 274 if (State.Input.empty()) 275 return Result->Value; 276 return makeParseError(State, "unexpected input after selector"); 277 } 278