xref: /freebsd-src/contrib/llvm-project/clang/utils/TableGen/ClangSyntaxEmitter.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
1*e8d8bef9SDimitry Andric //===- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ----------===//
2*e8d8bef9SDimitry Andric //
3*e8d8bef9SDimitry Andric //                     The LLVM Compiler Infrastructure
4*e8d8bef9SDimitry Andric //
5*e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6*e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
7*e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8*e8d8bef9SDimitry Andric //
9*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
10*e8d8bef9SDimitry Andric //
11*e8d8bef9SDimitry Andric // These backends consume the definitions of Syntax Tree nodes.
12*e8d8bef9SDimitry Andric // See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td
13*e8d8bef9SDimitry Andric //
14*e8d8bef9SDimitry Andric // The -gen-clang-syntax-node-list backend produces a .inc with macro calls
15*e8d8bef9SDimitry Andric //   NODE(Kind, BaseKind)
16*e8d8bef9SDimitry Andric //   ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
17*e8d8bef9SDimitry Andric // similar to those for AST nodes such as AST/DeclNodes.inc.
18*e8d8bef9SDimitry Andric //
19*e8d8bef9SDimitry Andric // The -gen-clang-syntax-node-classes backend produces definitions for the
20*e8d8bef9SDimitry Andric // syntax::Node subclasses (except those marked as External).
21*e8d8bef9SDimitry Andric //
22*e8d8bef9SDimitry Andric // In future, another backend will encode the structure of the various node
23*e8d8bef9SDimitry Andric // types in tables so their invariants can be checked and enforced.
24*e8d8bef9SDimitry Andric //
25*e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
26*e8d8bef9SDimitry Andric #include "TableGenBackends.h"
27*e8d8bef9SDimitry Andric 
28*e8d8bef9SDimitry Andric #include <deque>
29*e8d8bef9SDimitry Andric 
30*e8d8bef9SDimitry Andric #include "llvm/ADT/StringExtras.h"
31*e8d8bef9SDimitry Andric #include "llvm/Support/FormatVariadic.h"
32*e8d8bef9SDimitry Andric #include "llvm/Support/raw_ostream.h"
33*e8d8bef9SDimitry Andric #include "llvm/TableGen/Record.h"
34*e8d8bef9SDimitry Andric #include "llvm/TableGen/TableGenBackend.h"
35*e8d8bef9SDimitry Andric 
36*e8d8bef9SDimitry Andric namespace {
37*e8d8bef9SDimitry Andric using llvm::formatv;
38*e8d8bef9SDimitry Andric 
39*e8d8bef9SDimitry Andric // The class hierarchy of Node types.
40*e8d8bef9SDimitry Andric // We assemble this in order to be able to define the NodeKind enum in a
41*e8d8bef9SDimitry Andric // stable and useful way, where abstract Node subclasses correspond to ranges.
42*e8d8bef9SDimitry Andric class Hierarchy {
43*e8d8bef9SDimitry Andric public:
44*e8d8bef9SDimitry Andric   Hierarchy(const llvm::RecordKeeper &Records) {
45*e8d8bef9SDimitry Andric     for (llvm::Record *T : Records.getAllDerivedDefinitions("NodeType"))
46*e8d8bef9SDimitry Andric       add(T);
47*e8d8bef9SDimitry Andric     for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
48*e8d8bef9SDimitry Andric       if (llvm::Record *Base = Derived->getValueAsOptionalDef("base"))
49*e8d8bef9SDimitry Andric         link(Derived, Base);
50*e8d8bef9SDimitry Andric     for (NodeType &N : AllTypes) {
51*e8d8bef9SDimitry Andric       llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) {
52*e8d8bef9SDimitry Andric         return L->Record->getName() < R->Record->getName();
53*e8d8bef9SDimitry Andric       });
54*e8d8bef9SDimitry Andric       // Alternatives nodes must have subclasses, External nodes may do.
55*e8d8bef9SDimitry Andric       assert(N.Record->isSubClassOf("Alternatives") ||
56*e8d8bef9SDimitry Andric              N.Record->isSubClassOf("External") || N.Derived.empty());
57*e8d8bef9SDimitry Andric       assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty());
58*e8d8bef9SDimitry Andric     }
59*e8d8bef9SDimitry Andric   }
60*e8d8bef9SDimitry Andric 
61*e8d8bef9SDimitry Andric   struct NodeType {
62*e8d8bef9SDimitry Andric     const llvm::Record *Record = nullptr;
63*e8d8bef9SDimitry Andric     const NodeType *Base = nullptr;
64*e8d8bef9SDimitry Andric     std::vector<const NodeType *> Derived;
65*e8d8bef9SDimitry Andric     llvm::StringRef name() const { return Record->getName(); }
66*e8d8bef9SDimitry Andric   };
67*e8d8bef9SDimitry Andric 
68*e8d8bef9SDimitry Andric   NodeType &get(llvm::StringRef Name = "Node") {
69*e8d8bef9SDimitry Andric     auto NI = ByName.find(Name);
70*e8d8bef9SDimitry Andric     assert(NI != ByName.end() && "no such node");
71*e8d8bef9SDimitry Andric     return *NI->second;
72*e8d8bef9SDimitry Andric   }
73*e8d8bef9SDimitry Andric 
74*e8d8bef9SDimitry Andric   // Traverse the hierarchy in pre-order (base classes before derived).
75*e8d8bef9SDimitry Andric   void visit(llvm::function_ref<void(const NodeType &)> CB,
76*e8d8bef9SDimitry Andric              const NodeType *Start = nullptr) {
77*e8d8bef9SDimitry Andric     if (Start == nullptr)
78*e8d8bef9SDimitry Andric       Start = &get();
79*e8d8bef9SDimitry Andric     CB(*Start);
80*e8d8bef9SDimitry Andric     for (const NodeType *D : Start->Derived)
81*e8d8bef9SDimitry Andric       visit(CB, D);
82*e8d8bef9SDimitry Andric   }
83*e8d8bef9SDimitry Andric 
84*e8d8bef9SDimitry Andric private:
85*e8d8bef9SDimitry Andric   void add(const llvm::Record *R) {
86*e8d8bef9SDimitry Andric     AllTypes.emplace_back();
87*e8d8bef9SDimitry Andric     AllTypes.back().Record = R;
88*e8d8bef9SDimitry Andric     bool Inserted = ByName.try_emplace(R->getName(), &AllTypes.back()).second;
89*e8d8bef9SDimitry Andric     assert(Inserted && "Duplicate node name");
90*e8d8bef9SDimitry Andric     (void)Inserted;
91*e8d8bef9SDimitry Andric   }
92*e8d8bef9SDimitry Andric 
93*e8d8bef9SDimitry Andric   void link(const llvm::Record *Derived, const llvm::Record *Base) {
94*e8d8bef9SDimitry Andric     auto &CN = get(Derived->getName()), &PN = get(Base->getName());
95*e8d8bef9SDimitry Andric     assert(CN.Base == nullptr && "setting base twice");
96*e8d8bef9SDimitry Andric     PN.Derived.push_back(&CN);
97*e8d8bef9SDimitry Andric     CN.Base = &PN;
98*e8d8bef9SDimitry Andric   }
99*e8d8bef9SDimitry Andric 
100*e8d8bef9SDimitry Andric   std::deque<NodeType> AllTypes;
101*e8d8bef9SDimitry Andric   llvm::DenseMap<llvm::StringRef, NodeType *> ByName;
102*e8d8bef9SDimitry Andric };
103*e8d8bef9SDimitry Andric 
104*e8d8bef9SDimitry Andric const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) {
105*e8d8bef9SDimitry Andric   return N.Derived.empty() ? N : firstConcrete(*N.Derived.front());
106*e8d8bef9SDimitry Andric }
107*e8d8bef9SDimitry Andric const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
108*e8d8bef9SDimitry Andric   return N.Derived.empty() ? N : lastConcrete(*N.Derived.back());
109*e8d8bef9SDimitry Andric }
110*e8d8bef9SDimitry Andric 
111*e8d8bef9SDimitry Andric struct SyntaxConstraint {
112*e8d8bef9SDimitry Andric   SyntaxConstraint(const llvm::Record &R) {
113*e8d8bef9SDimitry Andric     if (R.isSubClassOf("Optional")) {
114*e8d8bef9SDimitry Andric       *this = SyntaxConstraint(*R.getValueAsDef("inner"));
115*e8d8bef9SDimitry Andric     } else if (R.isSubClassOf("AnyToken")) {
116*e8d8bef9SDimitry Andric       NodeType = "Leaf";
117*e8d8bef9SDimitry Andric     } else if (R.isSubClassOf("NodeType")) {
118*e8d8bef9SDimitry Andric       NodeType = R.getName().str();
119*e8d8bef9SDimitry Andric     } else {
120*e8d8bef9SDimitry Andric       assert(false && "Unhandled Syntax kind");
121*e8d8bef9SDimitry Andric     }
122*e8d8bef9SDimitry Andric   }
123*e8d8bef9SDimitry Andric 
124*e8d8bef9SDimitry Andric   std::string NodeType;
125*e8d8bef9SDimitry Andric   // optional and leaf types also go here, once we want to use them.
126*e8d8bef9SDimitry Andric };
127*e8d8bef9SDimitry Andric 
128*e8d8bef9SDimitry Andric } // namespace
129*e8d8bef9SDimitry Andric 
130*e8d8bef9SDimitry Andric void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records,
131*e8d8bef9SDimitry Andric                                     llvm::raw_ostream &OS) {
132*e8d8bef9SDimitry Andric   llvm::emitSourceFileHeader("Syntax tree node list", OS);
133*e8d8bef9SDimitry Andric   Hierarchy H(Records);
134*e8d8bef9SDimitry Andric   OS << R"cpp(
135*e8d8bef9SDimitry Andric #ifndef NODE
136*e8d8bef9SDimitry Andric #define NODE(Kind, Base)
137*e8d8bef9SDimitry Andric #endif
138*e8d8bef9SDimitry Andric 
139*e8d8bef9SDimitry Andric #ifndef CONCRETE_NODE
140*e8d8bef9SDimitry Andric #define CONCRETE_NODE(Kind, Base) NODE(Kind, Base)
141*e8d8bef9SDimitry Andric #endif
142*e8d8bef9SDimitry Andric 
143*e8d8bef9SDimitry Andric #ifndef ABSTRACT_NODE
144*e8d8bef9SDimitry Andric #define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base)
145*e8d8bef9SDimitry Andric #endif
146*e8d8bef9SDimitry Andric 
147*e8d8bef9SDimitry Andric )cpp";
148*e8d8bef9SDimitry Andric   H.visit([&](const Hierarchy::NodeType &N) {
149*e8d8bef9SDimitry Andric     // Don't emit ABSTRACT_NODE for node itself, which has no parent.
150*e8d8bef9SDimitry Andric     if (N.Base == nullptr)
151*e8d8bef9SDimitry Andric       return;
152*e8d8bef9SDimitry Andric     if (N.Derived.empty())
153*e8d8bef9SDimitry Andric       OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
154*e8d8bef9SDimitry Andric     else
155*e8d8bef9SDimitry Andric       OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
156*e8d8bef9SDimitry Andric                     N.Base->name(), firstConcrete(N).name(),
157*e8d8bef9SDimitry Andric                     lastConcrete(N).name());
158*e8d8bef9SDimitry Andric   });
159*e8d8bef9SDimitry Andric   OS << R"cpp(
160*e8d8bef9SDimitry Andric #undef NODE
161*e8d8bef9SDimitry Andric #undef CONCRETE_NODE
162*e8d8bef9SDimitry Andric #undef ABSTRACT_NODE
163*e8d8bef9SDimitry Andric )cpp";
164*e8d8bef9SDimitry Andric }
165*e8d8bef9SDimitry Andric 
166*e8d8bef9SDimitry Andric // Format a documentation string as a C++ comment.
167*e8d8bef9SDimitry Andric // Trims leading whitespace handling since comments come from a TableGen file:
168*e8d8bef9SDimitry Andric //    documentation = [{
169*e8d8bef9SDimitry Andric //      This is a widget. Example:
170*e8d8bef9SDimitry Andric //        widget.explode()
171*e8d8bef9SDimitry Andric //    }];
172*e8d8bef9SDimitry Andric // and should be formatted as:
173*e8d8bef9SDimitry Andric //    /// This is a widget. Example:
174*e8d8bef9SDimitry Andric //    ///   widget.explode()
175*e8d8bef9SDimitry Andric // Leading and trailing whitespace lines are stripped.
176*e8d8bef9SDimitry Andric // The indentation of the first line is stripped from all lines.
177*e8d8bef9SDimitry Andric static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) {
178*e8d8bef9SDimitry Andric   Doc = Doc.rtrim();
179*e8d8bef9SDimitry Andric   llvm::StringRef Line;
180*e8d8bef9SDimitry Andric   while (Line.trim().empty() && !Doc.empty())
181*e8d8bef9SDimitry Andric     std::tie(Line, Doc) = Doc.split('\n');
182*e8d8bef9SDimitry Andric   llvm::StringRef Indent = Line.take_while(llvm::isSpace);
183*e8d8bef9SDimitry Andric   for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) {
184*e8d8bef9SDimitry Andric     Line.consume_front(Indent);
185*e8d8bef9SDimitry Andric     OS << "/// " << Line << "\n";
186*e8d8bef9SDimitry Andric   }
187*e8d8bef9SDimitry Andric }
188*e8d8bef9SDimitry Andric 
189*e8d8bef9SDimitry Andric void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records,
190*e8d8bef9SDimitry Andric                                        llvm::raw_ostream &OS) {
191*e8d8bef9SDimitry Andric   llvm::emitSourceFileHeader("Syntax tree node list", OS);
192*e8d8bef9SDimitry Andric   Hierarchy H(Records);
193*e8d8bef9SDimitry Andric 
194*e8d8bef9SDimitry Andric   OS << "\n// Forward-declare node types so we don't have to carefully "
195*e8d8bef9SDimitry Andric         "sequence definitions.\n";
196*e8d8bef9SDimitry Andric   H.visit([&](const Hierarchy::NodeType &N) {
197*e8d8bef9SDimitry Andric     OS << "class " << N.name() << ";\n";
198*e8d8bef9SDimitry Andric   });
199*e8d8bef9SDimitry Andric 
200*e8d8bef9SDimitry Andric   OS << "\n// Node definitions\n\n";
201*e8d8bef9SDimitry Andric   H.visit([&](const Hierarchy::NodeType &N) {
202*e8d8bef9SDimitry Andric     if (N.Record->isSubClassOf("External"))
203*e8d8bef9SDimitry Andric       return;
204*e8d8bef9SDimitry Andric     printDoc(N.Record->getValueAsString("documentation"), OS);
205*e8d8bef9SDimitry Andric     OS << formatv("class {0}{1} : public {2} {{\n", N.name(),
206*e8d8bef9SDimitry Andric                   N.Derived.empty() ? " final" : "", N.Base->name());
207*e8d8bef9SDimitry Andric 
208*e8d8bef9SDimitry Andric     // Constructor.
209*e8d8bef9SDimitry Andric     if (N.Derived.empty())
210*e8d8bef9SDimitry Andric       OS << formatv("public:\n  {0}() : {1}(NodeKind::{0}) {{}\n", N.name(),
211*e8d8bef9SDimitry Andric                     N.Base->name());
212*e8d8bef9SDimitry Andric     else
213*e8d8bef9SDimitry Andric       OS << formatv("protected:\n  {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
214*e8d8bef9SDimitry Andric                     N.name(), N.Base->name());
215*e8d8bef9SDimitry Andric 
216*e8d8bef9SDimitry Andric     if (N.Record->isSubClassOf("Sequence")) {
217*e8d8bef9SDimitry Andric       // Getters for sequence elements.
218*e8d8bef9SDimitry Andric       for (const auto &C : N.Record->getValueAsListOfDefs("children")) {
219*e8d8bef9SDimitry Andric         assert(C->isSubClassOf("Role"));
220*e8d8bef9SDimitry Andric         llvm::StringRef Role = C->getValueAsString("role");
221*e8d8bef9SDimitry Andric         SyntaxConstraint Constraint(*C->getValueAsDef("syntax"));
222*e8d8bef9SDimitry Andric         for (const char *Const : {"", "const "})
223*e8d8bef9SDimitry Andric           OS << formatv(
224*e8d8bef9SDimitry Andric               "  {2}{1} *get{0}() {2} {{\n"
225*e8d8bef9SDimitry Andric               "    return llvm::cast_or_null<{1}>(findChild(NodeRole::{0}));\n"
226*e8d8bef9SDimitry Andric               "  }\n",
227*e8d8bef9SDimitry Andric               Role, Constraint.NodeType, Const);
228*e8d8bef9SDimitry Andric       }
229*e8d8bef9SDimitry Andric     }
230*e8d8bef9SDimitry Andric 
231*e8d8bef9SDimitry Andric     // classof. FIXME: move definition inline once ~all nodes are generated.
232*e8d8bef9SDimitry Andric     OS << "  static bool classof(const Node *N);\n";
233*e8d8bef9SDimitry Andric 
234*e8d8bef9SDimitry Andric     OS << "};\n\n";
235*e8d8bef9SDimitry Andric   });
236*e8d8bef9SDimitry Andric }
237