1*e8d8bef9SDimitry Andric //===- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ----------===// 2*e8d8bef9SDimitry Andric // 3*e8d8bef9SDimitry Andric // The LLVM Compiler Infrastructure 4*e8d8bef9SDimitry Andric // 5*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 6*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 7*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 8*e8d8bef9SDimitry Andric // 9*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 10*e8d8bef9SDimitry Andric // 11*e8d8bef9SDimitry Andric // These backends consume the definitions of Syntax Tree nodes. 12*e8d8bef9SDimitry Andric // See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td 13*e8d8bef9SDimitry Andric // 14*e8d8bef9SDimitry Andric // The -gen-clang-syntax-node-list backend produces a .inc with macro calls 15*e8d8bef9SDimitry Andric // NODE(Kind, BaseKind) 16*e8d8bef9SDimitry Andric // ABSTRACT_NODE(Type, Base, FirstKind, LastKind) 17*e8d8bef9SDimitry Andric // similar to those for AST nodes such as AST/DeclNodes.inc. 18*e8d8bef9SDimitry Andric // 19*e8d8bef9SDimitry Andric // The -gen-clang-syntax-node-classes backend produces definitions for the 20*e8d8bef9SDimitry Andric // syntax::Node subclasses (except those marked as External). 21*e8d8bef9SDimitry Andric // 22*e8d8bef9SDimitry Andric // In future, another backend will encode the structure of the various node 23*e8d8bef9SDimitry Andric // types in tables so their invariants can be checked and enforced. 24*e8d8bef9SDimitry Andric // 25*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 26*e8d8bef9SDimitry Andric #include "TableGenBackends.h" 27*e8d8bef9SDimitry Andric 28*e8d8bef9SDimitry Andric #include <deque> 29*e8d8bef9SDimitry Andric 30*e8d8bef9SDimitry Andric #include "llvm/ADT/StringExtras.h" 31*e8d8bef9SDimitry Andric #include "llvm/Support/FormatVariadic.h" 32*e8d8bef9SDimitry Andric #include "llvm/Support/raw_ostream.h" 33*e8d8bef9SDimitry Andric #include "llvm/TableGen/Record.h" 34*e8d8bef9SDimitry Andric #include "llvm/TableGen/TableGenBackend.h" 35*e8d8bef9SDimitry Andric 36*e8d8bef9SDimitry Andric namespace { 37*e8d8bef9SDimitry Andric using llvm::formatv; 38*e8d8bef9SDimitry Andric 39*e8d8bef9SDimitry Andric // The class hierarchy of Node types. 40*e8d8bef9SDimitry Andric // We assemble this in order to be able to define the NodeKind enum in a 41*e8d8bef9SDimitry Andric // stable and useful way, where abstract Node subclasses correspond to ranges. 42*e8d8bef9SDimitry Andric class Hierarchy { 43*e8d8bef9SDimitry Andric public: 44*e8d8bef9SDimitry Andric Hierarchy(const llvm::RecordKeeper &Records) { 45*e8d8bef9SDimitry Andric for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType")) 46*e8d8bef9SDimitry Andric add(T); 47*e8d8bef9SDimitry Andric for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType")) 48*e8d8bef9SDimitry Andric if (llvm::Record *Base = Derived->getValueAsOptionalDef("base")) 49*e8d8bef9SDimitry Andric link(Derived, Base); 50*e8d8bef9SDimitry Andric for (NodeType &N : AllTypes) { 51*e8d8bef9SDimitry Andric llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) { 52*e8d8bef9SDimitry Andric return L->Record->getName() < R->Record->getName(); 53*e8d8bef9SDimitry Andric }); 54*e8d8bef9SDimitry Andric // Alternatives nodes must have subclasses, External nodes may do. 55*e8d8bef9SDimitry Andric assert(N.Record->isSubClassOf("Alternatives") || 56*e8d8bef9SDimitry Andric N.Record->isSubClassOf("External") || N.Derived.empty()); 57*e8d8bef9SDimitry Andric assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty()); 58*e8d8bef9SDimitry Andric } 59*e8d8bef9SDimitry Andric } 60*e8d8bef9SDimitry Andric 61*e8d8bef9SDimitry Andric struct NodeType { 62*e8d8bef9SDimitry Andric const llvm::Record *Record = nullptr; 63*e8d8bef9SDimitry Andric const NodeType *Base = nullptr; 64*e8d8bef9SDimitry Andric std::vector<const NodeType *> Derived; 65*e8d8bef9SDimitry Andric llvm::StringRef name() const { return Record->getName(); } 66*e8d8bef9SDimitry Andric }; 67*e8d8bef9SDimitry Andric 68*e8d8bef9SDimitry Andric NodeType &get(llvm::StringRef Name = "Node") { 69*e8d8bef9SDimitry Andric auto NI = ByName.find(Name); 70*e8d8bef9SDimitry Andric assert(NI != ByName.end() && "no such node"); 71*e8d8bef9SDimitry Andric return *NI->second; 72*e8d8bef9SDimitry Andric } 73*e8d8bef9SDimitry Andric 74*e8d8bef9SDimitry Andric // Traverse the hierarchy in pre-order (base classes before derived). 75*e8d8bef9SDimitry Andric void visit(llvm::function_ref<void(const NodeType &)> CB, 76*e8d8bef9SDimitry Andric const NodeType *Start = nullptr) { 77*e8d8bef9SDimitry Andric if (Start == nullptr) 78*e8d8bef9SDimitry Andric Start = &get(); 79*e8d8bef9SDimitry Andric CB(*Start); 80*e8d8bef9SDimitry Andric for (const NodeType *D : Start->Derived) 81*e8d8bef9SDimitry Andric visit(CB, D); 82*e8d8bef9SDimitry Andric } 83*e8d8bef9SDimitry Andric 84*e8d8bef9SDimitry Andric private: 85*e8d8bef9SDimitry Andric void add(const llvm::Record *R) { 86*e8d8bef9SDimitry Andric AllTypes.emplace_back(); 87*e8d8bef9SDimitry Andric AllTypes.back().Record = R; 88*e8d8bef9SDimitry Andric bool Inserted = ByName.try_emplace(R->getName(), &AllTypes.back()).second; 89*e8d8bef9SDimitry Andric assert(Inserted && "Duplicate node name"); 90*e8d8bef9SDimitry Andric (void)Inserted; 91*e8d8bef9SDimitry Andric } 92*e8d8bef9SDimitry Andric 93*e8d8bef9SDimitry Andric void link(const llvm::Record *Derived, const llvm::Record *Base) { 94*e8d8bef9SDimitry Andric auto &CN = get(Derived->getName()), &PN = get(Base->getName()); 95*e8d8bef9SDimitry Andric assert(CN.Base == nullptr && "setting base twice"); 96*e8d8bef9SDimitry Andric PN.Derived.push_back(&CN); 97*e8d8bef9SDimitry Andric CN.Base = &PN; 98*e8d8bef9SDimitry Andric } 99*e8d8bef9SDimitry Andric 100*e8d8bef9SDimitry Andric std::deque<NodeType> AllTypes; 101*e8d8bef9SDimitry Andric llvm::DenseMap<llvm::StringRef, NodeType *> ByName; 102*e8d8bef9SDimitry Andric }; 103*e8d8bef9SDimitry Andric 104*e8d8bef9SDimitry Andric const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) { 105*e8d8bef9SDimitry Andric return N.Derived.empty() ? N : firstConcrete(*N.Derived.front()); 106*e8d8bef9SDimitry Andric } 107*e8d8bef9SDimitry Andric const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) { 108*e8d8bef9SDimitry Andric return N.Derived.empty() ? N : lastConcrete(*N.Derived.back()); 109*e8d8bef9SDimitry Andric } 110*e8d8bef9SDimitry Andric 111*e8d8bef9SDimitry Andric struct SyntaxConstraint { 112*e8d8bef9SDimitry Andric SyntaxConstraint(const llvm::Record &R) { 113*e8d8bef9SDimitry Andric if (R.isSubClassOf("Optional")) { 114*e8d8bef9SDimitry Andric *this = SyntaxConstraint(*R.getValueAsDef("inner")); 115*e8d8bef9SDimitry Andric } else if (R.isSubClassOf("AnyToken")) { 116*e8d8bef9SDimitry Andric NodeType = "Leaf"; 117*e8d8bef9SDimitry Andric } else if (R.isSubClassOf("NodeType")) { 118*e8d8bef9SDimitry Andric NodeType = R.getName().str(); 119*e8d8bef9SDimitry Andric } else { 120*e8d8bef9SDimitry Andric assert(false && "Unhandled Syntax kind"); 121*e8d8bef9SDimitry Andric } 122*e8d8bef9SDimitry Andric } 123*e8d8bef9SDimitry Andric 124*e8d8bef9SDimitry Andric std::string NodeType; 125*e8d8bef9SDimitry Andric // optional and leaf types also go here, once we want to use them. 126*e8d8bef9SDimitry Andric }; 127*e8d8bef9SDimitry Andric 128*e8d8bef9SDimitry Andric } // namespace 129*e8d8bef9SDimitry Andric 130*e8d8bef9SDimitry Andric void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records, 131*e8d8bef9SDimitry Andric llvm::raw_ostream &OS) { 132*e8d8bef9SDimitry Andric llvm::emitSourceFileHeader("Syntax tree node list", OS); 133*e8d8bef9SDimitry Andric Hierarchy H(Records); 134*e8d8bef9SDimitry Andric OS << R"cpp( 135*e8d8bef9SDimitry Andric #ifndef NODE 136*e8d8bef9SDimitry Andric #define NODE(Kind, Base) 137*e8d8bef9SDimitry Andric #endif 138*e8d8bef9SDimitry Andric 139*e8d8bef9SDimitry Andric #ifndef CONCRETE_NODE 140*e8d8bef9SDimitry Andric #define CONCRETE_NODE(Kind, Base) NODE(Kind, Base) 141*e8d8bef9SDimitry Andric #endif 142*e8d8bef9SDimitry Andric 143*e8d8bef9SDimitry Andric #ifndef ABSTRACT_NODE 144*e8d8bef9SDimitry Andric #define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base) 145*e8d8bef9SDimitry Andric #endif 146*e8d8bef9SDimitry Andric 147*e8d8bef9SDimitry Andric )cpp"; 148*e8d8bef9SDimitry Andric H.visit([&](const Hierarchy::NodeType &N) { 149*e8d8bef9SDimitry Andric // Don't emit ABSTRACT_NODE for node itself, which has no parent. 150*e8d8bef9SDimitry Andric if (N.Base == nullptr) 151*e8d8bef9SDimitry Andric return; 152*e8d8bef9SDimitry Andric if (N.Derived.empty()) 153*e8d8bef9SDimitry Andric OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name()); 154*e8d8bef9SDimitry Andric else 155*e8d8bef9SDimitry Andric OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(), 156*e8d8bef9SDimitry Andric N.Base->name(), firstConcrete(N).name(), 157*e8d8bef9SDimitry Andric lastConcrete(N).name()); 158*e8d8bef9SDimitry Andric }); 159*e8d8bef9SDimitry Andric OS << R"cpp( 160*e8d8bef9SDimitry Andric #undef NODE 161*e8d8bef9SDimitry Andric #undef CONCRETE_NODE 162*e8d8bef9SDimitry Andric #undef ABSTRACT_NODE 163*e8d8bef9SDimitry Andric )cpp"; 164*e8d8bef9SDimitry Andric } 165*e8d8bef9SDimitry Andric 166*e8d8bef9SDimitry Andric // Format a documentation string as a C++ comment. 167*e8d8bef9SDimitry Andric // Trims leading whitespace handling since comments come from a TableGen file: 168*e8d8bef9SDimitry Andric // documentation = [{ 169*e8d8bef9SDimitry Andric // This is a widget. Example: 170*e8d8bef9SDimitry Andric // widget.explode() 171*e8d8bef9SDimitry Andric // }]; 172*e8d8bef9SDimitry Andric // and should be formatted as: 173*e8d8bef9SDimitry Andric // /// This is a widget. Example: 174*e8d8bef9SDimitry Andric // /// widget.explode() 175*e8d8bef9SDimitry Andric // Leading and trailing whitespace lines are stripped. 176*e8d8bef9SDimitry Andric // The indentation of the first line is stripped from all lines. 177*e8d8bef9SDimitry Andric static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) { 178*e8d8bef9SDimitry Andric Doc = Doc.rtrim(); 179*e8d8bef9SDimitry Andric llvm::StringRef Line; 180*e8d8bef9SDimitry Andric while (Line.trim().empty() && !Doc.empty()) 181*e8d8bef9SDimitry Andric std::tie(Line, Doc) = Doc.split('\n'); 182*e8d8bef9SDimitry Andric llvm::StringRef Indent = Line.take_while(llvm::isSpace); 183*e8d8bef9SDimitry Andric for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) { 184*e8d8bef9SDimitry Andric Line.consume_front(Indent); 185*e8d8bef9SDimitry Andric OS << "/// " << Line << "\n"; 186*e8d8bef9SDimitry Andric } 187*e8d8bef9SDimitry Andric } 188*e8d8bef9SDimitry Andric 189*e8d8bef9SDimitry Andric void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records, 190*e8d8bef9SDimitry Andric llvm::raw_ostream &OS) { 191*e8d8bef9SDimitry Andric llvm::emitSourceFileHeader("Syntax tree node list", OS); 192*e8d8bef9SDimitry Andric Hierarchy H(Records); 193*e8d8bef9SDimitry Andric 194*e8d8bef9SDimitry Andric OS << "\n// Forward-declare node types so we don't have to carefully " 195*e8d8bef9SDimitry Andric "sequence definitions.\n"; 196*e8d8bef9SDimitry Andric H.visit([&](const Hierarchy::NodeType &N) { 197*e8d8bef9SDimitry Andric OS << "class " << N.name() << ";\n"; 198*e8d8bef9SDimitry Andric }); 199*e8d8bef9SDimitry Andric 200*e8d8bef9SDimitry Andric OS << "\n// Node definitions\n\n"; 201*e8d8bef9SDimitry Andric H.visit([&](const Hierarchy::NodeType &N) { 202*e8d8bef9SDimitry Andric if (N.Record->isSubClassOf("External")) 203*e8d8bef9SDimitry Andric return; 204*e8d8bef9SDimitry Andric printDoc(N.Record->getValueAsString("documentation"), OS); 205*e8d8bef9SDimitry Andric OS << formatv("class {0}{1} : public {2} {{\n", N.name(), 206*e8d8bef9SDimitry Andric N.Derived.empty() ? " final" : "", N.Base->name()); 207*e8d8bef9SDimitry Andric 208*e8d8bef9SDimitry Andric // Constructor. 209*e8d8bef9SDimitry Andric if (N.Derived.empty()) 210*e8d8bef9SDimitry Andric OS << formatv("public:\n {0}() : {1}(NodeKind::{0}) {{}\n", N.name(), 211*e8d8bef9SDimitry Andric N.Base->name()); 212*e8d8bef9SDimitry Andric else 213*e8d8bef9SDimitry Andric OS << formatv("protected:\n {0}(NodeKind K) : {1}(K) {{}\npublic:\n", 214*e8d8bef9SDimitry Andric N.name(), N.Base->name()); 215*e8d8bef9SDimitry Andric 216*e8d8bef9SDimitry Andric if (N.Record->isSubClassOf("Sequence")) { 217*e8d8bef9SDimitry Andric // Getters for sequence elements. 218*e8d8bef9SDimitry Andric for (const auto &C : N.Record->getValueAsListOfDefs("children")) { 219*e8d8bef9SDimitry Andric assert(C->isSubClassOf("Role")); 220*e8d8bef9SDimitry Andric llvm::StringRef Role = C->getValueAsString("role"); 221*e8d8bef9SDimitry Andric SyntaxConstraint Constraint(*C->getValueAsDef("syntax")); 222*e8d8bef9SDimitry Andric for (const char *Const : {"", "const "}) 223*e8d8bef9SDimitry Andric OS << formatv( 224*e8d8bef9SDimitry Andric " {2}{1} *get{0}() {2} {{\n" 225*e8d8bef9SDimitry Andric " return llvm::cast_or_null<{1}>(findChild(NodeRole::{0}));\n" 226*e8d8bef9SDimitry Andric " }\n", 227*e8d8bef9SDimitry Andric Role, Constraint.NodeType, Const); 228*e8d8bef9SDimitry Andric } 229*e8d8bef9SDimitry Andric } 230*e8d8bef9SDimitry Andric 231*e8d8bef9SDimitry Andric // classof. FIXME: move definition inline once ~all nodes are generated. 232*e8d8bef9SDimitry Andric OS << " static bool classof(const Node *N);\n"; 233*e8d8bef9SDimitry Andric 234*e8d8bef9SDimitry Andric OS << "};\n\n"; 235*e8d8bef9SDimitry Andric }); 236*e8d8bef9SDimitry Andric } 237