xref: /llvm-project/clang/utils/TableGen/ClangSyntaxEmitter.cpp (revision 63aa8cf6becbeb4983e3d1a7fa3cd8a7c7147118)
1 //===-- ClangSyntaxEmitter.cpp - Generate clang Syntax Tree nodes ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
6 // See https://llvm.org/LICENSE.txt for license information.
7 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //
9 //===----------------------------------------------------------------------===//
10 //
11 // These backends consume the definitions of Syntax Tree nodes.
12 // See clang/include/clang/Tooling/Syntax/{Syntax,Nodes}.td
13 //
14 // The -gen-clang-syntax-node-list backend produces a .inc with macro calls
15 //   NODE(Kind, BaseKind)
16 //   ABSTRACT_NODE(Type, Base, FirstKind, LastKind)
17 // similar to those for AST nodes such as AST/DeclNodes.inc.
18 //
19 // The -gen-clang-syntax-node-classes backend produces definitions for the
20 // syntax::Node subclasses (except those marked as External).
21 //
22 // In future, another backend will encode the structure of the various node
23 // types in tables so their invariants can be checked and enforced.
24 //
25 //===----------------------------------------------------------------------===//
26 #include "TableGenBackends.h"
27 
28 #include <deque>
29 
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Support/FormatVariadic.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/TableGen/Record.h"
34 #include "llvm/TableGen/TableGenBackend.h"
35 
36 using namespace llvm;
37 
38 namespace {
39 
40 // The class hierarchy of Node types.
41 // We assemble this in order to be able to define the NodeKind enum in a
42 // stable and useful way, where abstract Node subclasses correspond to ranges.
43 class Hierarchy {
44 public:
45   Hierarchy(const RecordKeeper &Records) {
46     for (const Record *T : Records.getAllDerivedDefinitions("NodeType"))
47       add(T);
48     for (const Record *Derived : Records.getAllDerivedDefinitions("NodeType"))
49       if (const Record *Base = Derived->getValueAsOptionalDef("base"))
50         link(Derived, Base);
51     for (NodeType &N : AllTypes) {
52       sort(N.Derived, [](const NodeType *L, const NodeType *R) {
53         return L->Rec->getName() < R->Rec->getName();
54       });
55       // Alternatives nodes must have subclasses, External nodes may do.
56       assert(N.Rec->isSubClassOf("Alternatives") ||
57              N.Rec->isSubClassOf("External") || N.Derived.empty());
58       assert(!N.Rec->isSubClassOf("Alternatives") || !N.Derived.empty());
59     }
60   }
61 
62   struct NodeType {
63     const Record *Rec = nullptr;
64     const NodeType *Base = nullptr;
65     std::vector<const NodeType *> Derived;
66     StringRef name() const { return Rec->getName(); }
67   };
68 
69   NodeType &get(StringRef Name = "Node") {
70     auto NI = ByName.find(Name);
71     assert(NI != ByName.end() && "no such node");
72     return *NI->second;
73   }
74 
75   // Traverse the hierarchy in pre-order (base classes before derived).
76   void visit(function_ref<void(const NodeType &)> CB,
77              const NodeType *Start = nullptr) {
78     if (Start == nullptr)
79       Start = &get();
80     CB(*Start);
81     for (const NodeType *D : Start->Derived)
82       visit(CB, D);
83   }
84 
85 private:
86   void add(const Record *R) {
87     AllTypes.emplace_back();
88     AllTypes.back().Rec = R;
89     bool Inserted = ByName.try_emplace(R->getName(), &AllTypes.back()).second;
90     assert(Inserted && "Duplicate node name");
91     (void)Inserted;
92   }
93 
94   void link(const Record *Derived, const Record *Base) {
95     auto &CN = get(Derived->getName()), &PN = get(Base->getName());
96     assert(CN.Base == nullptr && "setting base twice");
97     PN.Derived.push_back(&CN);
98     CN.Base = &PN;
99   }
100 
101   std::deque<NodeType> AllTypes;
102   DenseMap<StringRef, NodeType *> ByName;
103 };
104 
105 const Hierarchy::NodeType &firstConcrete(const Hierarchy::NodeType &N) {
106   return N.Derived.empty() ? N : firstConcrete(*N.Derived.front());
107 }
108 const Hierarchy::NodeType &lastConcrete(const Hierarchy::NodeType &N) {
109   return N.Derived.empty() ? N : lastConcrete(*N.Derived.back());
110 }
111 
112 struct SyntaxConstraint {
113   SyntaxConstraint(const Record &R) {
114     if (R.isSubClassOf("Optional")) {
115       *this = SyntaxConstraint(*R.getValueAsDef("inner"));
116     } else if (R.isSubClassOf("AnyToken")) {
117       NodeType = "Leaf";
118     } else if (R.isSubClassOf("NodeType")) {
119       NodeType = R.getName();
120     } else {
121       assert(false && "Unhandled Syntax kind");
122     }
123   }
124 
125   StringRef NodeType;
126   // optional and leaf types also go here, once we want to use them.
127 };
128 
129 } // namespace
130 
131 void clang::EmitClangSyntaxNodeList(const RecordKeeper &Records,
132                                     raw_ostream &OS) {
133   emitSourceFileHeader("Syntax tree node list", OS, Records);
134   Hierarchy H(Records);
135   OS << R"cpp(
136 #ifndef NODE
137 #define NODE(Kind, Base)
138 #endif
139 
140 #ifndef CONCRETE_NODE
141 #define CONCRETE_NODE(Kind, Base) NODE(Kind, Base)
142 #endif
143 
144 #ifndef ABSTRACT_NODE
145 #define ABSTRACT_NODE(Kind, Base, First, Last) NODE(Kind, Base)
146 #endif
147 
148 )cpp";
149   H.visit([&](const Hierarchy::NodeType &N) {
150     // Don't emit ABSTRACT_NODE for node itself, which has no parent.
151     if (N.Base == nullptr)
152       return;
153     if (N.Derived.empty())
154       OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name());
155     else
156       OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(),
157                     N.Base->name(), firstConcrete(N).name(),
158                     lastConcrete(N).name());
159   });
160   OS << R"cpp(
161 #undef NODE
162 #undef CONCRETE_NODE
163 #undef ABSTRACT_NODE
164 )cpp";
165 }
166 
167 // Format a documentation string as a C++ comment.
168 // Trims leading whitespace handling since comments come from a TableGen file:
169 //    documentation = [{
170 //      This is a widget. Example:
171 //        widget.explode()
172 //    }];
173 // and should be formatted as:
174 //    /// This is a widget. Example:
175 //    ///   widget.explode()
176 // Leading and trailing whitespace lines are stripped.
177 // The indentation of the first line is stripped from all lines.
178 static void printDoc(StringRef Doc, raw_ostream &OS) {
179   Doc = Doc.rtrim();
180   StringRef Line;
181   while (Line.trim().empty() && !Doc.empty())
182     std::tie(Line, Doc) = Doc.split('\n');
183   StringRef Indent = Line.take_while(isSpace);
184   for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) {
185     Line.consume_front(Indent);
186     OS << "/// " << Line << "\n";
187   }
188 }
189 
190 void clang::EmitClangSyntaxNodeClasses(const RecordKeeper &Records,
191                                        raw_ostream &OS) {
192   emitSourceFileHeader("Syntax tree node list", OS, Records);
193   Hierarchy H(Records);
194 
195   OS << "\n// Forward-declare node types so we don't have to carefully "
196         "sequence definitions.\n";
197   H.visit([&](const Hierarchy::NodeType &N) {
198     OS << "class " << N.name() << ";\n";
199   });
200 
201   OS << "\n// Node definitions\n\n";
202   H.visit([&](const Hierarchy::NodeType &N) {
203     if (N.Rec->isSubClassOf("External"))
204       return;
205     printDoc(N.Rec->getValueAsString("documentation"), OS);
206     OS << formatv("class {0}{1} : public {2} {{\n", N.name(),
207                   N.Derived.empty() ? " final" : "", N.Base->name());
208 
209     // Constructor.
210     if (N.Derived.empty())
211       OS << formatv("public:\n  {0}() : {1}(NodeKind::{0}) {{}\n", N.name(),
212                     N.Base->name());
213     else
214       OS << formatv("protected:\n  {0}(NodeKind K) : {1}(K) {{}\npublic:\n",
215                     N.name(), N.Base->name());
216 
217     if (N.Rec->isSubClassOf("Sequence")) {
218       // Getters for sequence elements.
219       for (const auto &C : N.Rec->getValueAsListOfDefs("children")) {
220         assert(C->isSubClassOf("Role"));
221         StringRef Role = C->getValueAsString("role");
222         SyntaxConstraint Constraint(*C->getValueAsDef("syntax"));
223         for (const char *Const : {"", "const "})
224           OS << formatv(
225               "  {2}{1} *get{0}() {2} {{\n"
226               "    return llvm::cast_or_null<{1}>(findChild(NodeRole::{0}));\n"
227               "  }\n",
228               Role, Constraint.NodeType, Const);
229       }
230     }
231 
232     // classof. FIXME: move definition inline once ~all nodes are generated.
233     OS << "  static bool classof(const Node *N);\n";
234 
235     OS << "};\n\n";
236   });
237 }
238