xref: /llvm-project/clang-tools-extra/clangd/DumpAST.cpp (revision fa6580470547411667b866362941db0b02e25578)
1 //===--- DumpAST.cpp - Serialize clang AST to LSP -------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "DumpAST.h"
10 #include "Protocol.h"
11 #include "SourceCode.h"
12 #include "support/Logger.h"
13 #include "clang/AST/ASTTypeTraits.h"
14 #include "clang/AST/Expr.h"
15 #include "clang/AST/ExprCXX.h"
16 #include "clang/AST/NestedNameSpecifier.h"
17 #include "clang/AST/PrettyPrinter.h"
18 #include "clang/AST/RecursiveASTVisitor.h"
19 #include "clang/AST/TextNodeDumper.h"
20 #include "clang/AST/Type.h"
21 #include "clang/AST/TypeLoc.h"
22 #include "clang/Basic/Specifiers.h"
23 #include "clang/Tooling/Syntax/Tokens.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include <optional>
27 
28 namespace clang {
29 namespace clangd {
30 namespace {
31 
32 using llvm::raw_ostream;
33 template <typename Print> std::string toString(const Print &C) {
34   std::string Result;
35   llvm::raw_string_ostream OS(Result);
36   C(OS);
37   return std::move(OS.str());
38 }
39 
40 bool isInjectedClassName(Decl *D) {
41   if (const auto *CRD = llvm::dyn_cast<CXXRecordDecl>(D))
42     return CRD->isInjectedClassName();
43   return false;
44 }
45 
46 class DumpVisitor : public RecursiveASTVisitor<DumpVisitor> {
47   using Base = RecursiveASTVisitor<DumpVisitor>;
48 
49   const syntax::TokenBuffer &Tokens;
50   const ASTContext &Ctx;
51 
52   // Pointers are into 'children' vector.
53   // They remain valid because while a node is on the stack we only add
54   // descendants, not siblings.
55   std::vector<ASTNode *> Stack;
56 
57   // Generic logic used to handle traversal of all node kinds.
58 
59   template <typename T>
60   bool traverseNodePre(llvm::StringRef Role, const T &Node) {
61     if (Stack.empty()) {
62       assert(Root.role.empty());
63       Stack.push_back(&Root);
64     } else {
65       Stack.back()->children.emplace_back();
66       Stack.push_back(&Stack.back()->children.back());
67     }
68     auto &N = *Stack.back();
69     N.role = Role.str();
70     N.kind = getKind(Node);
71     N.detail = getDetail(Node);
72     N.range = getRange(Node);
73     N.arcana = getArcana(Node);
74     return true;
75   }
76   bool traverseNodePost() {
77     assert(!Stack.empty());
78     Stack.pop_back();
79     return true;
80   }
81   template <typename T, typename Callable>
82   bool traverseNode(llvm::StringRef Role, const T &Node, const Callable &Body) {
83     traverseNodePre(Role, Node);
84     Body();
85     return traverseNodePost();
86   }
87 
88   // Range: most nodes have getSourceRange(), with a couple of exceptions.
89   // We only return it if it's valid at both ends and there are no macros.
90 
91   template <typename T> std::optional<Range> getRange(const T &Node) {
92     SourceRange SR = getSourceRange(Node);
93     auto Spelled = Tokens.spelledForExpanded(Tokens.expandedTokens(SR));
94     if (!Spelled)
95       return std::nullopt;
96     return halfOpenToRange(
97         Tokens.sourceManager(),
98         CharSourceRange::getCharRange(Spelled->front().location(),
99                                       Spelled->back().endLocation()));
100   }
101   template <typename T, typename = decltype(std::declval<T>().getSourceRange())>
102   SourceRange getSourceRange(const T &Node) {
103     return Node.getSourceRange();
104   }
105   template <typename T,
106             typename = decltype(std::declval<T *>()->getSourceRange())>
107   SourceRange getSourceRange(const T *Node) {
108     return Node->getSourceRange();
109   }
110   // TemplateName doesn't have a real Loc node type.
111   SourceRange getSourceRange(const TemplateName &Node) { return SourceRange(); }
112   // Attr just uses a weird method name. Maybe we should fix it instead?
113   SourceRange getSourceRange(const Attr *Node) { return Node->getRange(); }
114 
115   // Kind is usually the class name, without the suffix ("Type" etc).
116   // Where there's a set of variants instead, we use the 'Kind' enum values.
117 
118   std::string getKind(const Decl *D) { return D->getDeclKindName(); }
119   std::string getKind(const Stmt *S) {
120     std::string Result = S->getStmtClassName();
121     if (llvm::StringRef(Result).ends_with("Stmt") ||
122         llvm::StringRef(Result).ends_with("Expr"))
123       Result.resize(Result.size() - 4);
124     return Result;
125   }
126   std::string getKind(const TypeLoc &TL) {
127     std::string Result;
128     if (TL.getTypeLocClass() == TypeLoc::Qualified)
129       return "Qualified";
130     return TL.getType()->getTypeClassName();
131   }
132   std::string getKind(const TemplateArgumentLoc &TAL) {
133     switch (TAL.getArgument().getKind()) {
134 #define TEMPLATE_ARGUMENT_KIND(X)                                              \
135   case TemplateArgument::X:                                                    \
136     return #X
137       TEMPLATE_ARGUMENT_KIND(Null);
138       TEMPLATE_ARGUMENT_KIND(NullPtr);
139       TEMPLATE_ARGUMENT_KIND(Expression);
140       TEMPLATE_ARGUMENT_KIND(Integral);
141       TEMPLATE_ARGUMENT_KIND(Pack);
142       TEMPLATE_ARGUMENT_KIND(Type);
143       TEMPLATE_ARGUMENT_KIND(Declaration);
144       TEMPLATE_ARGUMENT_KIND(Template);
145       TEMPLATE_ARGUMENT_KIND(TemplateExpansion);
146       TEMPLATE_ARGUMENT_KIND(StructuralValue);
147 #undef TEMPLATE_ARGUMENT_KIND
148     }
149     llvm_unreachable("Unhandled ArgKind enum");
150   }
151   std::string getKind(const NestedNameSpecifierLoc &NNSL) {
152     assert(NNSL.getNestedNameSpecifier());
153     switch (NNSL.getNestedNameSpecifier()->getKind()) {
154 #define NNS_KIND(X)                                                            \
155   case NestedNameSpecifier::X:                                                 \
156     return #X
157       NNS_KIND(Identifier);
158       NNS_KIND(Namespace);
159       NNS_KIND(TypeSpec);
160       NNS_KIND(TypeSpecWithTemplate);
161       NNS_KIND(Global);
162       NNS_KIND(Super);
163       NNS_KIND(NamespaceAlias);
164 #undef NNS_KIND
165     }
166     llvm_unreachable("Unhandled SpecifierKind enum");
167   }
168   std::string getKind(const CXXCtorInitializer *CCI) {
169     if (CCI->isBaseInitializer())
170       return "BaseInitializer";
171     if (CCI->isDelegatingInitializer())
172       return "DelegatingInitializer";
173     if (CCI->isAnyMemberInitializer())
174       return "MemberInitializer";
175     llvm_unreachable("Unhandled CXXCtorInitializer type");
176   }
177   std::string getKind(const TemplateName &TN) {
178     switch (TN.getKind()) {
179 #define TEMPLATE_KIND(X)                                                       \
180   case TemplateName::X:                                                        \
181     return #X;
182       TEMPLATE_KIND(Template);
183       TEMPLATE_KIND(OverloadedTemplate);
184       TEMPLATE_KIND(AssumedTemplate);
185       TEMPLATE_KIND(QualifiedTemplate);
186       TEMPLATE_KIND(DependentTemplate);
187       TEMPLATE_KIND(SubstTemplateTemplateParm);
188       TEMPLATE_KIND(SubstTemplateTemplateParmPack);
189       TEMPLATE_KIND(UsingTemplate);
190       TEMPLATE_KIND(DeducedTemplate);
191 #undef TEMPLATE_KIND
192     }
193     llvm_unreachable("Unhandled NameKind enum");
194   }
195   std::string getKind(const Attr *A) {
196     switch (A->getKind()) {
197 #define ATTR(X)                                                                \
198   case attr::X:                                                                \
199     return #X;
200 #include "clang/Basic/AttrList.inc"
201 #undef ATTR
202     }
203     llvm_unreachable("Unhandled attr::Kind enum");
204   }
205   std::string getKind(const CXXBaseSpecifier &CBS) {
206     // There aren't really any variants of CXXBaseSpecifier.
207     // To avoid special cases in the API/UI, use public/private as the kind.
208     return getAccessSpelling(CBS.getAccessSpecifier()).str();
209   }
210   std::string getKind(const ConceptReference *CR) {
211     // Again there are no variants here.
212     // Kind is "Concept", role is "reference"
213     return "Concept";
214   }
215 
216   // Detail is the single most important fact about the node.
217   // Often this is the name, sometimes a "kind" enum like operators or casts.
218   // We should avoid unbounded text, like dumping parameter lists.
219 
220   std::string getDetail(const Decl *D) {
221     const auto *ND = dyn_cast<NamedDecl>(D);
222     if (!ND || llvm::isa_and_nonnull<CXXConstructorDecl>(ND->getAsFunction()) ||
223         isa<CXXDestructorDecl>(ND))
224       return "";
225     std::string Name = toString([&](raw_ostream &OS) { ND->printName(OS); });
226     if (Name.empty())
227       return "(anonymous)";
228     return Name;
229   }
230   std::string getDetail(const Stmt *S) {
231     if (const auto *DRE = dyn_cast<DeclRefExpr>(S))
232       return DRE->getNameInfo().getAsString();
233     if (const auto *DSDRE = dyn_cast<DependentScopeDeclRefExpr>(S))
234       return DSDRE->getNameInfo().getAsString();
235     if (const auto *ME = dyn_cast<MemberExpr>(S))
236       return ME->getMemberNameInfo().getAsString();
237     if (const auto *CE = dyn_cast<CastExpr>(S))
238       return CE->getCastKindName();
239     if (const auto *BO = dyn_cast<BinaryOperator>(S))
240       return BO->getOpcodeStr().str();
241     if (const auto *UO = dyn_cast<UnaryOperator>(S))
242       return UnaryOperator::getOpcodeStr(UO->getOpcode()).str();
243     if (const auto *CCO = dyn_cast<CXXConstructExpr>(S))
244       return CCO->getConstructor()->getNameAsString();
245     if (const auto *CTE = dyn_cast<CXXThisExpr>(S)) {
246       bool Const = CTE->getType()->getPointeeType().isLocalConstQualified();
247       if (CTE->isImplicit())
248         return Const ? "const, implicit" : "implicit";
249       if (Const)
250         return "const";
251       return "";
252     }
253     if (isa<IntegerLiteral, FloatingLiteral, FixedPointLiteral,
254             CharacterLiteral, ImaginaryLiteral, CXXBoolLiteralExpr>(S))
255       return toString([&](raw_ostream &OS) {
256         S->printPretty(OS, nullptr, Ctx.getPrintingPolicy());
257       });
258     if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(S))
259       return MTE->isBoundToLvalueReference() ? "lvalue" : "rvalue";
260     return "";
261   }
262   std::string getDetail(const TypeLoc &TL) {
263     if (TL.getType().hasLocalQualifiers())
264       return TL.getType().getLocalQualifiers().getAsString(
265           Ctx.getPrintingPolicy());
266     if (const auto *TT = dyn_cast<TagType>(TL.getTypePtr()))
267       return getDetail(TT->getDecl());
268     if (const auto *DT = dyn_cast<DeducedType>(TL.getTypePtr()))
269       if (DT->isDeduced())
270         return DT->getDeducedType().getAsString(Ctx.getPrintingPolicy());
271     if (const auto *BT = dyn_cast<BuiltinType>(TL.getTypePtr()))
272       return BT->getName(Ctx.getPrintingPolicy()).str();
273     if (const auto *TTPT = dyn_cast<TemplateTypeParmType>(TL.getTypePtr()))
274       return getDetail(TTPT->getDecl());
275     if (const auto *TT = dyn_cast<TypedefType>(TL.getTypePtr()))
276       return getDetail(TT->getDecl());
277     return "";
278   }
279   std::string getDetail(const NestedNameSpecifierLoc &NNSL) {
280     const auto &NNS = *NNSL.getNestedNameSpecifier();
281     switch (NNS.getKind()) {
282     case NestedNameSpecifier::Identifier:
283       return NNS.getAsIdentifier()->getName().str() + "::";
284     case NestedNameSpecifier::Namespace:
285       return NNS.getAsNamespace()->getNameAsString() + "::";
286     case NestedNameSpecifier::NamespaceAlias:
287       return NNS.getAsNamespaceAlias()->getNameAsString() + "::";
288     default:
289       return "";
290     }
291   }
292   std::string getDetail(const CXXCtorInitializer *CCI) {
293     if (FieldDecl *FD = CCI->getAnyMember())
294       return getDetail(FD);
295     if (TypeLoc TL = CCI->getBaseClassLoc())
296       return getDetail(TL);
297     return "";
298   }
299   std::string getDetail(const TemplateArgumentLoc &TAL) {
300     if (TAL.getArgument().getKind() == TemplateArgument::Integral)
301       return toString(TAL.getArgument().getAsIntegral(), 10);
302     return "";
303   }
304   std::string getDetail(const TemplateName &TN) {
305     return toString([&](raw_ostream &OS) {
306       TN.print(OS, Ctx.getPrintingPolicy(), TemplateName::Qualified::None);
307     });
308   }
309   std::string getDetail(const Attr *A) {
310     return A->getAttrName() ? A->getNormalizedFullName() : A->getSpelling();
311   }
312   std::string getDetail(const CXXBaseSpecifier &CBS) {
313     return CBS.isVirtual() ? "virtual" : "";
314   }
315   std::string getDetail(const ConceptReference *CR) {
316     return CR->getNamedConcept()->getNameAsString();
317   }
318 
319   /// Arcana is produced by TextNodeDumper, for the types it supports.
320 
321   template <typename Dump> std::string dump(const Dump &D) {
322     return toString([&](raw_ostream &OS) {
323       TextNodeDumper Dumper(OS, Ctx, /*ShowColors=*/false);
324       D(Dumper);
325     });
326   }
327   template <typename T> std::string getArcana(const T &N) {
328     return dump([&](TextNodeDumper &D) { D.Visit(N); });
329   }
330   std::string getArcana(const NestedNameSpecifierLoc &NNS) { return ""; }
331   std::string getArcana(const TemplateName &NNS) { return ""; }
332   std::string getArcana(const CXXBaseSpecifier &CBS) { return ""; }
333   std::string getArcana(const TemplateArgumentLoc &TAL) {
334     return dump([&](TextNodeDumper &D) {
335       D.Visit(TAL.getArgument(), TAL.getSourceRange());
336     });
337   }
338   std::string getArcana(const TypeLoc &TL) {
339     return dump([&](TextNodeDumper &D) { D.Visit(TL.getType()); });
340   }
341 
342 public:
343   ASTNode Root;
344   DumpVisitor(const syntax::TokenBuffer &Tokens, const ASTContext &Ctx)
345       : Tokens(Tokens), Ctx(Ctx) {}
346 
347   // Override traversal to record the nodes we care about.
348   // Generally, these are nodes with position information (TypeLoc, not Type).
349 
350   bool TraverseDecl(Decl *D) {
351     return !D || isInjectedClassName(D) ||
352            traverseNode("declaration", D, [&] { Base::TraverseDecl(D); });
353   }
354   bool TraverseTypeLoc(TypeLoc TL) {
355     return !TL || traverseNode("type", TL, [&] { Base::TraverseTypeLoc(TL); });
356   }
357   bool TraverseTemplateName(const TemplateName &TN) {
358     return traverseNode("template name", TN,
359                         [&] { Base::TraverseTemplateName(TN); });
360   }
361   bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &TAL) {
362     return traverseNode("template argument", TAL,
363                         [&] { Base::TraverseTemplateArgumentLoc(TAL); });
364   }
365   bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNSL) {
366     return !NNSL || traverseNode("specifier", NNSL, [&] {
367       Base::TraverseNestedNameSpecifierLoc(NNSL);
368     });
369   }
370   bool TraverseConstructorInitializer(CXXCtorInitializer *CCI) {
371     return !CCI || traverseNode("constructor initializer", CCI, [&] {
372       Base::TraverseConstructorInitializer(CCI);
373     });
374   }
375   bool TraverseAttr(Attr *A) {
376     return !A || traverseNode("attribute", A, [&] { Base::TraverseAttr(A); });
377   }
378   bool TraverseConceptReference(ConceptReference *C) {
379     return !C || traverseNode("reference", C,
380                               [&] { Base::TraverseConceptReference(C); });
381   }
382   bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier &CBS) {
383     return traverseNode("base", CBS,
384                         [&] { Base::TraverseCXXBaseSpecifier(CBS); });
385   }
386   // Stmt is the same, but this form allows the data recursion optimization.
387   bool dataTraverseStmtPre(Stmt *S) {
388     return S && traverseNodePre(isa<Expr>(S) ? "expression" : "statement", S);
389   }
390   bool dataTraverseStmtPost(Stmt *X) { return traverseNodePost(); }
391 
392   // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
393   // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
394   // This means we'd never see 'int' in 'const int'! Work around that here.
395   // (The reason for the behavior is to avoid traversing the nested Type twice,
396   // but we ignore TraverseType anyway).
397   bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QTL) {
398     return TraverseTypeLoc(QTL.getUnqualifiedLoc());
399   }
400   // Uninteresting parts of the AST that don't have locations within them.
401   bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
402   bool TraverseType(QualType) { return true; }
403 
404   // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
405   bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) {
406     return TraverseStmt(E->getSourceExpr());
407   }
408   // We only want to traverse the *syntactic form* to understand the selection.
409   bool TraversePseudoObjectExpr(PseudoObjectExpr *E) {
410     return TraverseStmt(E->getSyntacticForm());
411   }
412 };
413 
414 } // namespace
415 
416 ASTNode dumpAST(const DynTypedNode &N, const syntax::TokenBuffer &Tokens,
417                 const ASTContext &Ctx) {
418   DumpVisitor V(Tokens, Ctx);
419   // DynTypedNode only works with const, RecursiveASTVisitor only non-const :-(
420   if (const auto *D = N.get<Decl>())
421     V.TraverseDecl(const_cast<Decl *>(D));
422   else if (const auto *S = N.get<Stmt>())
423     V.TraverseStmt(const_cast<Stmt *>(S));
424   else if (const auto *NNSL = N.get<NestedNameSpecifierLoc>())
425     V.TraverseNestedNameSpecifierLoc(
426         *const_cast<NestedNameSpecifierLoc *>(NNSL));
427   else if (const auto *NNS = N.get<NestedNameSpecifier>())
428     V.TraverseNestedNameSpecifier(const_cast<NestedNameSpecifier *>(NNS));
429   else if (const auto *TL = N.get<TypeLoc>())
430     V.TraverseTypeLoc(*const_cast<TypeLoc *>(TL));
431   else if (const auto *QT = N.get<QualType>())
432     V.TraverseType(*const_cast<QualType *>(QT));
433   else if (const auto *CCI = N.get<CXXCtorInitializer>())
434     V.TraverseConstructorInitializer(const_cast<CXXCtorInitializer *>(CCI));
435   else if (const auto *TAL = N.get<TemplateArgumentLoc>())
436     V.TraverseTemplateArgumentLoc(*const_cast<TemplateArgumentLoc *>(TAL));
437   else if (const auto *CBS = N.get<CXXBaseSpecifier>())
438     V.TraverseCXXBaseSpecifier(*const_cast<CXXBaseSpecifier *>(CBS));
439   else if (const auto *CR = N.get<ConceptReference>())
440     V.TraverseConceptReference(const_cast<ConceptReference *>(CR));
441   else
442     elog("dumpAST: unhandled DynTypedNode kind {0}",
443          N.getNodeKind().asStringRef());
444   return std::move(V.Root);
445 }
446 
447 } // namespace clangd
448 } // namespace clang
449