1 //===- FormatGen.h - Utilities for custom assembly formats ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains common classes for building custom assembly format parsers 10 // and generators. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_ 15 #define MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_ 16 17 #include "mlir/Support/LLVM.h" 18 #include "llvm/ADT/StringRef.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/Support/Allocator.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Support/SMLoc.h" 23 #include <vector> 24 25 namespace llvm { 26 class SourceMgr; 27 } // namespace llvm 28 29 namespace mlir { 30 namespace tblgen { 31 32 //===----------------------------------------------------------------------===// 33 // FormatToken 34 //===----------------------------------------------------------------------===// 35 36 /// This class represents a specific token in the input format. 37 class FormatToken { 38 public: 39 /// Basic token kinds. 40 enum Kind { 41 // Markers. 42 eof, 43 error, 44 45 // Tokens with no info. 46 l_paren, 47 r_paren, 48 caret, 49 colon, 50 comma, 51 equal, 52 less, 53 greater, 54 question, 55 star, 56 pipe, 57 58 // Keywords. 59 keyword_start, 60 kw_attr_dict, 61 kw_attr_dict_w_keyword, 62 kw_prop_dict, 63 kw_custom, 64 kw_functional_type, 65 kw_oilist, 66 kw_operands, 67 kw_params, 68 kw_qualified, 69 kw_ref, 70 kw_regions, 71 kw_results, 72 kw_struct, 73 kw_successors, 74 kw_type, 75 keyword_end, 76 77 // String valued tokens. 78 identifier, 79 literal, 80 variable, 81 string, 82 }; 83 FormatToken(Kind kind,StringRef spelling)84 FormatToken(Kind kind, StringRef spelling) : kind(kind), spelling(spelling) {} 85 86 /// Return the bytes that make up this token. getSpelling()87 StringRef getSpelling() const { return spelling; } 88 89 /// Return the kind of this token. getKind()90 Kind getKind() const { return kind; } 91 92 /// Return a location for this token. 93 SMLoc getLoc() const; 94 95 /// Returns true if the token is of the given kind. is(Kind kind)96 bool is(Kind kind) { return getKind() == kind; } 97 98 /// Return if this token is a keyword. isKeyword()99 bool isKeyword() const { 100 return getKind() > Kind::keyword_start && getKind() < Kind::keyword_end; 101 } 102 103 private: 104 /// Discriminator that indicates the kind of token this is. 105 Kind kind; 106 107 /// A reference to the entire token contents; this is always a pointer into 108 /// a memory buffer owned by the source manager. 109 StringRef spelling; 110 }; 111 112 //===----------------------------------------------------------------------===// 113 // FormatLexer 114 //===----------------------------------------------------------------------===// 115 116 /// This class implements a simple lexer for operation assembly format strings. 117 class FormatLexer { 118 public: 119 FormatLexer(llvm::SourceMgr &mgr, SMLoc loc); 120 121 /// Lex the next token and return it. 122 FormatToken lexToken(); 123 124 /// Emit an error to the lexer with the given location and message. 125 FormatToken emitError(SMLoc loc, const Twine &msg); 126 FormatToken emitError(const char *loc, const Twine &msg); 127 128 FormatToken emitErrorAndNote(SMLoc loc, const Twine &msg, const Twine ¬e); 129 130 private: 131 /// Return the next character in the stream. 132 int getNextChar(); 133 134 /// Lex an identifier, literal, variable, or string. 135 FormatToken lexIdentifier(const char *tokStart); 136 FormatToken lexLiteral(const char *tokStart); 137 FormatToken lexVariable(const char *tokStart); 138 FormatToken lexString(const char *tokStart); 139 140 /// Create a token with the current pointer and a start pointer. formToken(FormatToken::Kind kind,const char * tokStart)141 FormatToken formToken(FormatToken::Kind kind, const char *tokStart) { 142 return FormatToken(kind, StringRef(tokStart, curPtr - tokStart)); 143 } 144 145 /// The source manager containing the format string. 146 llvm::SourceMgr &mgr; 147 /// Location of the format string. 148 SMLoc loc; 149 /// Buffer containing the format string. 150 StringRef curBuffer; 151 /// Current pointer in the buffer. 152 const char *curPtr; 153 }; 154 155 //===----------------------------------------------------------------------===// 156 // FormatElement 157 //===----------------------------------------------------------------------===// 158 159 /// This class represents a single format element. 160 /// 161 /// If you squint and take a close look, you can see the outline of a `Format` 162 /// dialect. 163 class FormatElement { 164 public: 165 virtual ~FormatElement(); 166 167 // The top-level kinds of format elements. 168 enum Kind { Literal, String, Variable, Whitespace, Directive, Optional }; 169 170 /// Support LLVM-style RTTI. classof(const FormatElement * el)171 static bool classof(const FormatElement *el) { return true; } 172 173 /// Get the element kind. getKind()174 Kind getKind() const { return kind; } 175 176 protected: 177 /// Create a format element with the given kind. FormatElement(Kind kind)178 FormatElement(Kind kind) : kind(kind) {} 179 180 private: 181 /// The kind of the element. 182 Kind kind; 183 }; 184 185 /// The base class for all format elements. This class implements common methods 186 /// for LLVM-style RTTI. 187 template <FormatElement::Kind ElementKind> 188 class FormatElementBase : public FormatElement { 189 public: 190 /// Support LLVM-style RTTI. classof(const FormatElement * el)191 static bool classof(const FormatElement *el) { 192 return ElementKind == el->getKind(); 193 } 194 195 protected: 196 /// Create a format element with the given kind. FormatElementBase()197 FormatElementBase() : FormatElement(ElementKind) {} 198 }; 199 200 /// This class represents a literal element. A literal is either one of the 201 /// supported punctuation characters (e.g. `(` or `,`) or a string literal (e.g. 202 /// `literal`). 203 class LiteralElement : public FormatElementBase<FormatElement::Literal> { 204 public: 205 /// Create a literal element with the given spelling. LiteralElement(StringRef spelling)206 explicit LiteralElement(StringRef spelling) : spelling(spelling) {} 207 208 /// Get the spelling of the literal. getSpelling()209 StringRef getSpelling() const { return spelling; } 210 211 private: 212 /// The spelling of the variable, i.e. the string contained within the 213 /// backticks. 214 StringRef spelling; 215 }; 216 217 /// This class represents a raw string that can contain arbitrary C++ code. 218 class StringElement : public FormatElementBase<FormatElement::String> { 219 public: 220 /// Create a string element with the given contents. StringElement(std::string value)221 explicit StringElement(std::string value) : value(std::move(value)) {} 222 223 /// Get the value of the string element. getValue()224 StringRef getValue() const { return value; } 225 226 private: 227 /// The contents of the string. 228 std::string value; 229 }; 230 231 /// This class represents a variable element. A variable refers to some part of 232 /// the object being parsed, e.g. an attribute or operand on an operation or a 233 /// parameter on an attribute. 234 class VariableElement : public FormatElementBase<FormatElement::Variable> { 235 public: 236 /// These are the kinds of variables. 237 enum Kind { 238 Attribute, 239 Operand, 240 Region, 241 Result, 242 Successor, 243 Parameter, 244 Property 245 }; 246 247 /// Get the kind of variable. getKind()248 Kind getKind() const { return kind; } 249 250 protected: 251 /// Create a variable with a kind. VariableElement(Kind kind)252 VariableElement(Kind kind) : kind(kind) {} 253 254 private: 255 /// The kind of variable. 256 Kind kind; 257 }; 258 259 /// Base class for variable elements. This class implements common methods for 260 /// LLVM-style RTTI. 261 template <VariableElement::Kind VariableKind> 262 class VariableElementBase : public VariableElement { 263 public: 264 /// An element is of this class if it is a variable and has the same variable 265 /// type. classof(const FormatElement * el)266 static bool classof(const FormatElement *el) { 267 if (auto *varEl = dyn_cast<VariableElement>(el)) 268 return VariableKind == varEl->getKind(); 269 return false; 270 } 271 272 protected: 273 /// Create a variable element with the given variable kind. VariableElementBase()274 VariableElementBase() : VariableElement(VariableKind) {} 275 }; 276 277 /// This class represents a whitespace element, e.g. a newline or space. It is a 278 /// literal that is printed but never parsed. When the value is empty, i.e. ``, 279 /// a space is elided where one would have been printed automatically. 280 class WhitespaceElement : public FormatElementBase<FormatElement::Whitespace> { 281 public: 282 /// Create a whitespace element. WhitespaceElement(StringRef value)283 explicit WhitespaceElement(StringRef value) : value(value) {} 284 285 /// Get the whitespace value. getValue()286 StringRef getValue() const { return value; } 287 288 private: 289 /// The value of the whitespace element. Can be empty. 290 StringRef value; 291 }; 292 293 class DirectiveElement : public FormatElementBase<FormatElement::Directive> { 294 public: 295 /// These are the kinds of directives. 296 enum Kind { 297 AttrDict, 298 PropDict, 299 Custom, 300 FunctionalType, 301 OIList, 302 Operands, 303 Ref, 304 Regions, 305 Results, 306 Successors, 307 Type, 308 Params, 309 Struct 310 }; 311 312 /// Get the directive kind. getKind()313 Kind getKind() const { return kind; } 314 315 protected: 316 /// Create a directive element with a kind. DirectiveElement(Kind kind)317 DirectiveElement(Kind kind) : kind(kind) {} 318 319 private: 320 /// The directive kind. 321 Kind kind; 322 }; 323 324 /// Base class for directive elements. This class implements common methods for 325 /// LLVM-style RTTI. 326 template <DirectiveElement::Kind DirectiveKind> 327 class DirectiveElementBase : public DirectiveElement { 328 public: 329 /// Create a directive element with the specified kind. DirectiveElementBase()330 DirectiveElementBase() : DirectiveElement(DirectiveKind) {} 331 332 /// A format element is of this class if it is a directive element and has the 333 /// same kind. classof(const FormatElement * el)334 static bool classof(const FormatElement *el) { 335 if (auto *directiveEl = dyn_cast<DirectiveElement>(el)) 336 return DirectiveKind == directiveEl->getKind(); 337 return false; 338 } 339 }; 340 341 /// This class represents a custom format directive that is implemented by the 342 /// user in C++. The directive accepts a list of arguments that is passed to the 343 /// C++ function. 344 class CustomDirective : public DirectiveElementBase<DirectiveElement::Custom> { 345 public: 346 /// Create a custom directive with a name and list of arguments. CustomDirective(StringRef name,std::vector<FormatElement * > && arguments)347 CustomDirective(StringRef name, std::vector<FormatElement *> &&arguments) 348 : name(name), arguments(std::move(arguments)) {} 349 350 /// Get the custom directive name. getName()351 StringRef getName() const { return name; } 352 353 /// Get the arguments to the custom directive. getArguments()354 ArrayRef<FormatElement *> getArguments() const { return arguments; } 355 356 private: 357 /// The name of the custom directive. The name is used to call two C++ 358 /// methods: `parse{name}` and `print{name}` with the given arguments. 359 StringRef name; 360 /// The arguments with which to call the custom functions. These are either 361 /// variables (for which the functions are responsible for populating) or 362 /// references to variables. 363 std::vector<FormatElement *> arguments; 364 }; 365 366 /// This class represents a reference directive. This directive can be used to 367 /// reference but not bind a previously bound variable or format object. Its 368 /// current only use is to pass variables as arguments to the custom directive. 369 class RefDirective : public DirectiveElementBase<DirectiveElement::Ref> { 370 public: 371 /// Create a reference directive with the single referenced child. RefDirective(FormatElement * arg)372 RefDirective(FormatElement *arg) : arg(arg) {} 373 374 /// Get the reference argument. getArg()375 FormatElement *getArg() const { return arg; } 376 377 private: 378 /// The referenced argument. 379 FormatElement *arg; 380 }; 381 382 /// This class represents a group of elements that are optionally emitted based 383 /// on an optional variable "anchor" and a group of elements that are emitted 384 /// when the anchor element is not present. 385 class OptionalElement : public FormatElementBase<FormatElement::Optional> { 386 public: 387 /// Create an optional group with the given child elements. OptionalElement(std::vector<FormatElement * > && thenElements,std::vector<FormatElement * > && elseElements,unsigned thenParseStart,unsigned elseParseStart,FormatElement * anchor,bool inverted)388 OptionalElement(std::vector<FormatElement *> &&thenElements, 389 std::vector<FormatElement *> &&elseElements, 390 unsigned thenParseStart, unsigned elseParseStart, 391 FormatElement *anchor, bool inverted) 392 : thenElements(std::move(thenElements)), 393 elseElements(std::move(elseElements)), thenParseStart(thenParseStart), 394 elseParseStart(elseParseStart), anchor(anchor), inverted(inverted) {} 395 396 /// Return the `then` elements of the optional group. Drops the first 397 /// `thenParseStart` whitespace elements if `parseable` is true. 398 ArrayRef<FormatElement *> getThenElements(bool parseable = false) const { 399 return llvm::ArrayRef(thenElements) 400 .drop_front(parseable ? thenParseStart : 0); 401 } 402 403 /// Return the `else` elements of the optional group. Drops the first 404 /// `elseParseStart` whitespace elements if `parseable` is true. 405 ArrayRef<FormatElement *> getElseElements(bool parseable = false) const { 406 return llvm::ArrayRef(elseElements) 407 .drop_front(parseable ? elseParseStart : 0); 408 } 409 410 /// Return the anchor of the optional group. getAnchor()411 FormatElement *getAnchor() const { return anchor; } 412 413 /// Return true if the optional group is inverted. isInverted()414 bool isInverted() const { return inverted; } 415 416 private: 417 /// The child elements emitted when the anchor is present. 418 std::vector<FormatElement *> thenElements; 419 /// The child elements emitted when the anchor is not present. 420 std::vector<FormatElement *> elseElements; 421 /// The index of the first element that is parsed in `thenElements`. That is, 422 /// the first non-whitespace element. 423 unsigned thenParseStart; 424 /// The index of the first element that is parsed in `elseElements`. That is, 425 /// the first non-whitespace element. 426 unsigned elseParseStart; 427 /// The anchor element of the optional group. 428 FormatElement *anchor; 429 /// Whether the optional group condition is inverted and the anchor element is 430 /// in the else group. 431 bool inverted; 432 }; 433 434 //===----------------------------------------------------------------------===// 435 // FormatParserBase 436 //===----------------------------------------------------------------------===// 437 438 /// Base class for a parser that implements an assembly format. This class 439 /// defines a common assembly format syntax and the creation of format elements. 440 /// Subclasses will need to implement parsing for the format elements they 441 /// support. 442 class FormatParser { 443 public: 444 /// Vtable anchor. 445 virtual ~FormatParser(); 446 447 /// Parse the assembly format. 448 FailureOr<std::vector<FormatElement *>> parse(); 449 450 protected: 451 /// The current context of the parser when parsing an element. 452 enum Context { 453 /// The element is being parsed in a "top-level" context, i.e. at the top of 454 /// the format or in an optional group. 455 TopLevelContext, 456 /// The element is being parsed as a custom directive child. 457 CustomDirectiveContext, 458 /// The element is being parsed as a type directive child. 459 TypeDirectiveContext, 460 /// The element is being parsed as a reference directive child. 461 RefDirectiveContext, 462 /// The element is being parsed as a struct directive child. 463 StructDirectiveContext 464 }; 465 466 /// Create a format parser with the given source manager and a location. FormatParser(llvm::SourceMgr & mgr,llvm::SMLoc loc)467 explicit FormatParser(llvm::SourceMgr &mgr, llvm::SMLoc loc) 468 : lexer(mgr, loc), curToken(lexer.lexToken()) {} 469 470 /// Allocate and construct a format element. 471 template <typename FormatElementT, typename... Args> create(Args &&...args)472 FormatElementT *create(Args &&...args) { 473 // FormatElementT *ptr = allocator.Allocate<FormatElementT>(); 474 // ::new (ptr) FormatElementT(std::forward<Args>(args)...); 475 // return ptr; 476 auto mem = std::make_unique<FormatElementT>(std::forward<Args>(args)...); 477 FormatElementT *ptr = mem.get(); 478 allocator.push_back(std::move(mem)); 479 return ptr; 480 } 481 482 //===--------------------------------------------------------------------===// 483 // Element Parsing 484 485 /// Parse a single element of any kind. 486 FailureOr<FormatElement *> parseElement(Context ctx); 487 /// Parse a literal. 488 FailureOr<FormatElement *> parseLiteral(Context ctx); 489 /// Parse a string. 490 FailureOr<FormatElement *> parseString(Context ctx); 491 /// Parse a variable. 492 FailureOr<FormatElement *> parseVariable(Context ctx); 493 /// Parse a directive. 494 FailureOr<FormatElement *> parseDirective(Context ctx); 495 /// Parse an optional group. 496 FailureOr<FormatElement *> parseOptionalGroup(Context ctx); 497 /// Parse a custom directive. 498 FailureOr<FormatElement *> parseCustomDirective(llvm::SMLoc loc, Context ctx); 499 /// Parse a ref directive. 500 FailureOr<FormatElement *> parseRefDirective(SMLoc loc, Context context); 501 /// Parse a qualified directive. 502 FailureOr<FormatElement *> parseQualifiedDirective(SMLoc loc, Context ctx); 503 504 /// Parse a format-specific variable kind. 505 virtual FailureOr<FormatElement *> 506 parseVariableImpl(llvm::SMLoc loc, StringRef name, Context ctx) = 0; 507 /// Parse a format-specific directive kind. 508 virtual FailureOr<FormatElement *> 509 parseDirectiveImpl(llvm::SMLoc loc, FormatToken::Kind kind, Context ctx) = 0; 510 511 //===--------------------------------------------------------------------===// 512 // Format Verification 513 514 /// Verify that the format is well-formed. 515 virtual LogicalResult verify(llvm::SMLoc loc, 516 ArrayRef<FormatElement *> elements) = 0; 517 /// Verify the arguments to a custom directive. 518 virtual LogicalResult 519 verifyCustomDirectiveArguments(llvm::SMLoc loc, 520 ArrayRef<FormatElement *> arguments) = 0; 521 /// Verify the elements of an optional group. 522 virtual LogicalResult 523 verifyOptionalGroupElements(llvm::SMLoc loc, 524 ArrayRef<FormatElement *> elements, 525 FormatElement *anchor) = 0; 526 527 /// Mark 'element' as qualified. If 'element' cannot be qualified an error 528 /// should be emitted and failure returned. 529 virtual LogicalResult markQualified(llvm::SMLoc loc, 530 FormatElement *element) = 0; 531 532 //===--------------------------------------------------------------------===// 533 // Lexer Utilities 534 535 /// Emit an error at the given location. emitError(llvm::SMLoc loc,const Twine & msg)536 LogicalResult emitError(llvm::SMLoc loc, const Twine &msg) { 537 lexer.emitError(loc, msg); 538 return failure(); 539 } 540 541 /// Emit an error and a note at the given notation. emitErrorAndNote(llvm::SMLoc loc,const Twine & msg,const Twine & note)542 LogicalResult emitErrorAndNote(llvm::SMLoc loc, const Twine &msg, 543 const Twine ¬e) { 544 lexer.emitErrorAndNote(loc, msg, note); 545 return failure(); 546 } 547 548 /// Parse a single token of the expected kind. parseToken(FormatToken::Kind kind,const Twine & msg)549 FailureOr<FormatToken> parseToken(FormatToken::Kind kind, const Twine &msg) { 550 if (!curToken.is(kind)) 551 return emitError(curToken.getLoc(), msg); 552 FormatToken tok = curToken; 553 consumeToken(); 554 return tok; 555 } 556 557 /// Advance the lexer to the next token. consumeToken()558 void consumeToken() { 559 assert(!curToken.is(FormatToken::eof) && !curToken.is(FormatToken::error) && 560 "shouldn't advance past EOF or errors"); 561 curToken = lexer.lexToken(); 562 } 563 564 /// Get the current token. peekToken()565 FormatToken peekToken() { return curToken; } 566 567 private: 568 /// The format parser retains ownership of the format elements in a bump 569 /// pointer allocator. 570 // FIXME: FormatElement with `std::vector` need to be converted to use 571 // trailing objects. 572 // llvm::BumpPtrAllocator allocator; 573 std::vector<std::unique_ptr<FormatElement>> allocator; 574 /// The format lexer to use. 575 FormatLexer lexer; 576 /// The current token in the lexer. 577 FormatToken curToken; 578 }; 579 580 //===----------------------------------------------------------------------===// 581 // Utility Functions 582 //===----------------------------------------------------------------------===// 583 584 /// Whether a space needs to be emitted before a literal. E.g., two keywords 585 /// back-to-back require a space separator, but a keyword followed by '<' does 586 /// not require a space. 587 bool shouldEmitSpaceBefore(StringRef value, bool lastWasPunctuation); 588 589 /// Returns true if the given string can be formatted as a keyword. 590 bool canFormatStringAsKeyword(StringRef value, 591 function_ref<void(Twine)> emitError = nullptr); 592 593 /// Returns true if the given string is valid format literal element. 594 /// If `emitError` is provided, it is invoked with the reason for the failure. 595 bool isValidLiteral(StringRef value, 596 function_ref<void(Twine)> emitError = nullptr); 597 598 /// Whether a failure in parsing the assembly format should be a fatal error. 599 extern llvm::cl::opt<bool> formatErrorIsFatal; 600 601 } // namespace tblgen 602 } // namespace mlir 603 604 #endif // MLIR_TOOLS_MLIRTBLGEN_FORMATGEN_H_ 605