1 //===--- DumpAST.cpp - Serialize clang AST to LSP -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "DumpAST.h" 10 #include "Protocol.h" 11 #include "SourceCode.h" 12 #include "support/Logger.h" 13 #include "clang/AST/ASTTypeTraits.h" 14 #include "clang/AST/Expr.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/AST/NestedNameSpecifier.h" 17 #include "clang/AST/PrettyPrinter.h" 18 #include "clang/AST/RecursiveASTVisitor.h" 19 #include "clang/AST/TextNodeDumper.h" 20 #include "clang/AST/Type.h" 21 #include "clang/AST/TypeLoc.h" 22 #include "clang/Basic/Specifiers.h" 23 #include "clang/Tooling/Syntax/Tokens.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/Support/raw_ostream.h" 26 #include <optional> 27 28 namespace clang { 29 namespace clangd { 30 namespace { 31 32 using llvm::raw_ostream; 33 template <typename Print> std::string toString(const Print &C) { 34 std::string Result; 35 llvm::raw_string_ostream OS(Result); 36 C(OS); 37 return std::move(OS.str()); 38 } 39 40 bool isInjectedClassName(Decl *D) { 41 if (const auto *CRD = llvm::dyn_cast<CXXRecordDecl>(D)) 42 return CRD->isInjectedClassName(); 43 return false; 44 } 45 46 class DumpVisitor : public RecursiveASTVisitor<DumpVisitor> { 47 using Base = RecursiveASTVisitor<DumpVisitor>; 48 49 const syntax::TokenBuffer &Tokens; 50 const ASTContext &Ctx; 51 52 // Pointers are into 'children' vector. 53 // They remain valid because while a node is on the stack we only add 54 // descendants, not siblings. 55 std::vector<ASTNode *> Stack; 56 57 // Generic logic used to handle traversal of all node kinds. 58 59 template <typename T> 60 bool traverseNodePre(llvm::StringRef Role, const T &Node) { 61 if (Stack.empty()) { 62 assert(Root.role.empty()); 63 Stack.push_back(&Root); 64 } else { 65 Stack.back()->children.emplace_back(); 66 Stack.push_back(&Stack.back()->children.back()); 67 } 68 auto &N = *Stack.back(); 69 N.role = Role.str(); 70 N.kind = getKind(Node); 71 N.detail = getDetail(Node); 72 N.range = getRange(Node); 73 N.arcana = getArcana(Node); 74 return true; 75 } 76 bool traverseNodePost() { 77 assert(!Stack.empty()); 78 Stack.pop_back(); 79 return true; 80 } 81 template <typename T, typename Callable> 82 bool traverseNode(llvm::StringRef Role, const T &Node, const Callable &Body) { 83 traverseNodePre(Role, Node); 84 Body(); 85 return traverseNodePost(); 86 } 87 88 // Range: most nodes have getSourceRange(), with a couple of exceptions. 89 // We only return it if it's valid at both ends and there are no macros. 90 91 template <typename T> std::optional<Range> getRange(const T &Node) { 92 SourceRange SR = getSourceRange(Node); 93 auto Spelled = Tokens.spelledForExpanded(Tokens.expandedTokens(SR)); 94 if (!Spelled) 95 return std::nullopt; 96 return halfOpenToRange( 97 Tokens.sourceManager(), 98 CharSourceRange::getCharRange(Spelled->front().location(), 99 Spelled->back().endLocation())); 100 } 101 template <typename T, typename = decltype(std::declval<T>().getSourceRange())> 102 SourceRange getSourceRange(const T &Node) { 103 return Node.getSourceRange(); 104 } 105 template <typename T, 106 typename = decltype(std::declval<T *>()->getSourceRange())> 107 SourceRange getSourceRange(const T *Node) { 108 return Node->getSourceRange(); 109 } 110 // TemplateName doesn't have a real Loc node type. 111 SourceRange getSourceRange(const TemplateName &Node) { return SourceRange(); } 112 // Attr just uses a weird method name. Maybe we should fix it instead? 113 SourceRange getSourceRange(const Attr *Node) { return Node->getRange(); } 114 115 // Kind is usually the class name, without the suffix ("Type" etc). 116 // Where there's a set of variants instead, we use the 'Kind' enum values. 117 118 std::string getKind(const Decl *D) { return D->getDeclKindName(); } 119 std::string getKind(const Stmt *S) { 120 std::string Result = S->getStmtClassName(); 121 if (llvm::StringRef(Result).ends_with("Stmt") || 122 llvm::StringRef(Result).ends_with("Expr")) 123 Result.resize(Result.size() - 4); 124 return Result; 125 } 126 std::string getKind(const TypeLoc &TL) { 127 std::string Result; 128 if (TL.getTypeLocClass() == TypeLoc::Qualified) 129 return "Qualified"; 130 return TL.getType()->getTypeClassName(); 131 } 132 std::string getKind(const TemplateArgumentLoc &TAL) { 133 switch (TAL.getArgument().getKind()) { 134 #define TEMPLATE_ARGUMENT_KIND(X) \ 135 case TemplateArgument::X: \ 136 return #X 137 TEMPLATE_ARGUMENT_KIND(Null); 138 TEMPLATE_ARGUMENT_KIND(NullPtr); 139 TEMPLATE_ARGUMENT_KIND(Expression); 140 TEMPLATE_ARGUMENT_KIND(Integral); 141 TEMPLATE_ARGUMENT_KIND(Pack); 142 TEMPLATE_ARGUMENT_KIND(Type); 143 TEMPLATE_ARGUMENT_KIND(Declaration); 144 TEMPLATE_ARGUMENT_KIND(Template); 145 TEMPLATE_ARGUMENT_KIND(TemplateExpansion); 146 TEMPLATE_ARGUMENT_KIND(StructuralValue); 147 #undef TEMPLATE_ARGUMENT_KIND 148 } 149 llvm_unreachable("Unhandled ArgKind enum"); 150 } 151 std::string getKind(const NestedNameSpecifierLoc &NNSL) { 152 assert(NNSL.getNestedNameSpecifier()); 153 switch (NNSL.getNestedNameSpecifier()->getKind()) { 154 #define NNS_KIND(X) \ 155 case NestedNameSpecifier::X: \ 156 return #X 157 NNS_KIND(Identifier); 158 NNS_KIND(Namespace); 159 NNS_KIND(TypeSpec); 160 NNS_KIND(TypeSpecWithTemplate); 161 NNS_KIND(Global); 162 NNS_KIND(Super); 163 NNS_KIND(NamespaceAlias); 164 #undef NNS_KIND 165 } 166 llvm_unreachable("Unhandled SpecifierKind enum"); 167 } 168 std::string getKind(const CXXCtorInitializer *CCI) { 169 if (CCI->isBaseInitializer()) 170 return "BaseInitializer"; 171 if (CCI->isDelegatingInitializer()) 172 return "DelegatingInitializer"; 173 if (CCI->isAnyMemberInitializer()) 174 return "MemberInitializer"; 175 llvm_unreachable("Unhandled CXXCtorInitializer type"); 176 } 177 std::string getKind(const TemplateName &TN) { 178 switch (TN.getKind()) { 179 #define TEMPLATE_KIND(X) \ 180 case TemplateName::X: \ 181 return #X; 182 TEMPLATE_KIND(Template); 183 TEMPLATE_KIND(OverloadedTemplate); 184 TEMPLATE_KIND(AssumedTemplate); 185 TEMPLATE_KIND(QualifiedTemplate); 186 TEMPLATE_KIND(DependentTemplate); 187 TEMPLATE_KIND(SubstTemplateTemplateParm); 188 TEMPLATE_KIND(SubstTemplateTemplateParmPack); 189 TEMPLATE_KIND(UsingTemplate); 190 TEMPLATE_KIND(DeducedTemplate); 191 #undef TEMPLATE_KIND 192 } 193 llvm_unreachable("Unhandled NameKind enum"); 194 } 195 std::string getKind(const Attr *A) { 196 switch (A->getKind()) { 197 #define ATTR(X) \ 198 case attr::X: \ 199 return #X; 200 #include "clang/Basic/AttrList.inc" 201 #undef ATTR 202 } 203 llvm_unreachable("Unhandled attr::Kind enum"); 204 } 205 std::string getKind(const CXXBaseSpecifier &CBS) { 206 // There aren't really any variants of CXXBaseSpecifier. 207 // To avoid special cases in the API/UI, use public/private as the kind. 208 return getAccessSpelling(CBS.getAccessSpecifier()).str(); 209 } 210 std::string getKind(const ConceptReference *CR) { 211 // Again there are no variants here. 212 // Kind is "Concept", role is "reference" 213 return "Concept"; 214 } 215 216 // Detail is the single most important fact about the node. 217 // Often this is the name, sometimes a "kind" enum like operators or casts. 218 // We should avoid unbounded text, like dumping parameter lists. 219 220 std::string getDetail(const Decl *D) { 221 const auto *ND = dyn_cast<NamedDecl>(D); 222 if (!ND || llvm::isa_and_nonnull<CXXConstructorDecl>(ND->getAsFunction()) || 223 isa<CXXDestructorDecl>(ND)) 224 return ""; 225 std::string Name = toString([&](raw_ostream &OS) { ND->printName(OS); }); 226 if (Name.empty()) 227 return "(anonymous)"; 228 return Name; 229 } 230 std::string getDetail(const Stmt *S) { 231 if (const auto *DRE = dyn_cast<DeclRefExpr>(S)) 232 return DRE->getNameInfo().getAsString(); 233 if (const auto *DSDRE = dyn_cast<DependentScopeDeclRefExpr>(S)) 234 return DSDRE->getNameInfo().getAsString(); 235 if (const auto *ME = dyn_cast<MemberExpr>(S)) 236 return ME->getMemberNameInfo().getAsString(); 237 if (const auto *CE = dyn_cast<CastExpr>(S)) 238 return CE->getCastKindName(); 239 if (const auto *BO = dyn_cast<BinaryOperator>(S)) 240 return BO->getOpcodeStr().str(); 241 if (const auto *UO = dyn_cast<UnaryOperator>(S)) 242 return UnaryOperator::getOpcodeStr(UO->getOpcode()).str(); 243 if (const auto *CCO = dyn_cast<CXXConstructExpr>(S)) 244 return CCO->getConstructor()->getNameAsString(); 245 if (const auto *CTE = dyn_cast<CXXThisExpr>(S)) { 246 bool Const = CTE->getType()->getPointeeType().isLocalConstQualified(); 247 if (CTE->isImplicit()) 248 return Const ? "const, implicit" : "implicit"; 249 if (Const) 250 return "const"; 251 return ""; 252 } 253 if (isa<IntegerLiteral, FloatingLiteral, FixedPointLiteral, 254 CharacterLiteral, ImaginaryLiteral, CXXBoolLiteralExpr>(S)) 255 return toString([&](raw_ostream &OS) { 256 S->printPretty(OS, nullptr, Ctx.getPrintingPolicy()); 257 }); 258 if (const auto *MTE = dyn_cast<MaterializeTemporaryExpr>(S)) 259 return MTE->isBoundToLvalueReference() ? "lvalue" : "rvalue"; 260 return ""; 261 } 262 std::string getDetail(const TypeLoc &TL) { 263 if (TL.getType().hasLocalQualifiers()) 264 return TL.getType().getLocalQualifiers().getAsString( 265 Ctx.getPrintingPolicy()); 266 if (const auto *TT = dyn_cast<TagType>(TL.getTypePtr())) 267 return getDetail(TT->getDecl()); 268 if (const auto *DT = dyn_cast<DeducedType>(TL.getTypePtr())) 269 if (DT->isDeduced()) 270 return DT->getDeducedType().getAsString(Ctx.getPrintingPolicy()); 271 if (const auto *BT = dyn_cast<BuiltinType>(TL.getTypePtr())) 272 return BT->getName(Ctx.getPrintingPolicy()).str(); 273 if (const auto *TTPT = dyn_cast<TemplateTypeParmType>(TL.getTypePtr())) 274 return getDetail(TTPT->getDecl()); 275 if (const auto *TT = dyn_cast<TypedefType>(TL.getTypePtr())) 276 return getDetail(TT->getDecl()); 277 return ""; 278 } 279 std::string getDetail(const NestedNameSpecifierLoc &NNSL) { 280 const auto &NNS = *NNSL.getNestedNameSpecifier(); 281 switch (NNS.getKind()) { 282 case NestedNameSpecifier::Identifier: 283 return NNS.getAsIdentifier()->getName().str() + "::"; 284 case NestedNameSpecifier::Namespace: 285 return NNS.getAsNamespace()->getNameAsString() + "::"; 286 case NestedNameSpecifier::NamespaceAlias: 287 return NNS.getAsNamespaceAlias()->getNameAsString() + "::"; 288 default: 289 return ""; 290 } 291 } 292 std::string getDetail(const CXXCtorInitializer *CCI) { 293 if (FieldDecl *FD = CCI->getAnyMember()) 294 return getDetail(FD); 295 if (TypeLoc TL = CCI->getBaseClassLoc()) 296 return getDetail(TL); 297 return ""; 298 } 299 std::string getDetail(const TemplateArgumentLoc &TAL) { 300 if (TAL.getArgument().getKind() == TemplateArgument::Integral) 301 return toString(TAL.getArgument().getAsIntegral(), 10); 302 return ""; 303 } 304 std::string getDetail(const TemplateName &TN) { 305 return toString([&](raw_ostream &OS) { 306 TN.print(OS, Ctx.getPrintingPolicy(), TemplateName::Qualified::None); 307 }); 308 } 309 std::string getDetail(const Attr *A) { 310 return A->getAttrName() ? A->getNormalizedFullName() : A->getSpelling(); 311 } 312 std::string getDetail(const CXXBaseSpecifier &CBS) { 313 return CBS.isVirtual() ? "virtual" : ""; 314 } 315 std::string getDetail(const ConceptReference *CR) { 316 return CR->getNamedConcept()->getNameAsString(); 317 } 318 319 /// Arcana is produced by TextNodeDumper, for the types it supports. 320 321 template <typename Dump> std::string dump(const Dump &D) { 322 return toString([&](raw_ostream &OS) { 323 TextNodeDumper Dumper(OS, Ctx, /*ShowColors=*/false); 324 D(Dumper); 325 }); 326 } 327 template <typename T> std::string getArcana(const T &N) { 328 return dump([&](TextNodeDumper &D) { D.Visit(N); }); 329 } 330 std::string getArcana(const NestedNameSpecifierLoc &NNS) { return ""; } 331 std::string getArcana(const TemplateName &NNS) { return ""; } 332 std::string getArcana(const CXXBaseSpecifier &CBS) { return ""; } 333 std::string getArcana(const TemplateArgumentLoc &TAL) { 334 return dump([&](TextNodeDumper &D) { 335 D.Visit(TAL.getArgument(), TAL.getSourceRange()); 336 }); 337 } 338 std::string getArcana(const TypeLoc &TL) { 339 return dump([&](TextNodeDumper &D) { D.Visit(TL.getType()); }); 340 } 341 342 public: 343 ASTNode Root; 344 DumpVisitor(const syntax::TokenBuffer &Tokens, const ASTContext &Ctx) 345 : Tokens(Tokens), Ctx(Ctx) {} 346 347 // Override traversal to record the nodes we care about. 348 // Generally, these are nodes with position information (TypeLoc, not Type). 349 350 bool TraverseDecl(Decl *D) { 351 return !D || isInjectedClassName(D) || 352 traverseNode("declaration", D, [&] { Base::TraverseDecl(D); }); 353 } 354 bool TraverseTypeLoc(TypeLoc TL) { 355 return !TL || traverseNode("type", TL, [&] { Base::TraverseTypeLoc(TL); }); 356 } 357 bool TraverseTemplateName(const TemplateName &TN) { 358 return traverseNode("template name", TN, 359 [&] { Base::TraverseTemplateName(TN); }); 360 } 361 bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &TAL) { 362 return traverseNode("template argument", TAL, 363 [&] { Base::TraverseTemplateArgumentLoc(TAL); }); 364 } 365 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNSL) { 366 return !NNSL || traverseNode("specifier", NNSL, [&] { 367 Base::TraverseNestedNameSpecifierLoc(NNSL); 368 }); 369 } 370 bool TraverseConstructorInitializer(CXXCtorInitializer *CCI) { 371 return !CCI || traverseNode("constructor initializer", CCI, [&] { 372 Base::TraverseConstructorInitializer(CCI); 373 }); 374 } 375 bool TraverseAttr(Attr *A) { 376 return !A || traverseNode("attribute", A, [&] { Base::TraverseAttr(A); }); 377 } 378 bool TraverseConceptReference(ConceptReference *C) { 379 return !C || traverseNode("reference", C, 380 [&] { Base::TraverseConceptReference(C); }); 381 } 382 bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier &CBS) { 383 return traverseNode("base", CBS, 384 [&] { Base::TraverseCXXBaseSpecifier(CBS); }); 385 } 386 // Stmt is the same, but this form allows the data recursion optimization. 387 bool dataTraverseStmtPre(Stmt *S) { 388 return S && traverseNodePre(isa<Expr>(S) ? "expression" : "statement", S); 389 } 390 bool dataTraverseStmtPost(Stmt *X) { return traverseNodePost(); } 391 392 // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived 393 // TraverseTypeLoc is not called for the inner UnqualTypeLoc. 394 // This means we'd never see 'int' in 'const int'! Work around that here. 395 // (The reason for the behavior is to avoid traversing the nested Type twice, 396 // but we ignore TraverseType anyway). 397 bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QTL) { 398 return TraverseTypeLoc(QTL.getUnqualifiedLoc()); 399 } 400 // Uninteresting parts of the AST that don't have locations within them. 401 bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; } 402 bool TraverseType(QualType) { return true; } 403 404 // OpaqueValueExpr blocks traversal, we must explicitly traverse it. 405 bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) { 406 return TraverseStmt(E->getSourceExpr()); 407 } 408 // We only want to traverse the *syntactic form* to understand the selection. 409 bool TraversePseudoObjectExpr(PseudoObjectExpr *E) { 410 return TraverseStmt(E->getSyntacticForm()); 411 } 412 }; 413 414 } // namespace 415 416 ASTNode dumpAST(const DynTypedNode &N, const syntax::TokenBuffer &Tokens, 417 const ASTContext &Ctx) { 418 DumpVisitor V(Tokens, Ctx); 419 // DynTypedNode only works with const, RecursiveASTVisitor only non-const :-( 420 if (const auto *D = N.get<Decl>()) 421 V.TraverseDecl(const_cast<Decl *>(D)); 422 else if (const auto *S = N.get<Stmt>()) 423 V.TraverseStmt(const_cast<Stmt *>(S)); 424 else if (const auto *NNSL = N.get<NestedNameSpecifierLoc>()) 425 V.TraverseNestedNameSpecifierLoc( 426 *const_cast<NestedNameSpecifierLoc *>(NNSL)); 427 else if (const auto *NNS = N.get<NestedNameSpecifier>()) 428 V.TraverseNestedNameSpecifier(const_cast<NestedNameSpecifier *>(NNS)); 429 else if (const auto *TL = N.get<TypeLoc>()) 430 V.TraverseTypeLoc(*const_cast<TypeLoc *>(TL)); 431 else if (const auto *QT = N.get<QualType>()) 432 V.TraverseType(*const_cast<QualType *>(QT)); 433 else if (const auto *CCI = N.get<CXXCtorInitializer>()) 434 V.TraverseConstructorInitializer(const_cast<CXXCtorInitializer *>(CCI)); 435 else if (const auto *TAL = N.get<TemplateArgumentLoc>()) 436 V.TraverseTemplateArgumentLoc(*const_cast<TemplateArgumentLoc *>(TAL)); 437 else if (const auto *CBS = N.get<CXXBaseSpecifier>()) 438 V.TraverseCXXBaseSpecifier(*const_cast<CXXBaseSpecifier *>(CBS)); 439 else if (const auto *CR = N.get<ConceptReference>()) 440 V.TraverseConceptReference(const_cast<ConceptReference *>(CR)); 441 else 442 elog("dumpAST: unhandled DynTypedNode kind {0}", 443 N.getNodeKind().asStringRef()); 444 return std::move(V.Root); 445 } 446 447 } // namespace clangd 448 } // namespace clang 449