1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/ASTFwd.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/DeclBase.h" 12 #include "clang/AST/DeclCXX.h" 13 #include "clang/AST/DeclarationName.h" 14 #include "clang/AST/Expr.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/AST/RecursiveASTVisitor.h" 17 #include "clang/AST/Stmt.h" 18 #include "clang/AST/TypeLoc.h" 19 #include "clang/AST/TypeLocVisitor.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Basic/Specifiers.h" 24 #include "clang/Basic/TokenKinds.h" 25 #include "clang/Lex/Lexer.h" 26 #include "clang/Lex/LiteralSupport.h" 27 #include "clang/Tooling/Syntax/Nodes.h" 28 #include "clang/Tooling/Syntax/Tokens.h" 29 #include "clang/Tooling/Syntax/Tree.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/DenseMap.h" 32 #include "llvm/ADT/PointerUnion.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/ScopeExit.h" 35 #include "llvm/ADT/SmallVector.h" 36 #include "llvm/Support/Allocator.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/FormatVariadic.h" 40 #include "llvm/Support/MemoryBuffer.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <cstddef> 43 #include <map> 44 45 using namespace clang; 46 47 LLVM_ATTRIBUTE_UNUSED 48 static bool isImplicitExpr(Expr *E) { return E->IgnoreImplicit() != E; } 49 50 namespace { 51 /// Get start location of the Declarator from the TypeLoc. 52 /// E.g.: 53 /// loc of `(` in `int (a)` 54 /// loc of `*` in `int *(a)` 55 /// loc of the first `(` in `int (*a)(int)` 56 /// loc of the `*` in `int *(a)(int)` 57 /// loc of the first `*` in `const int *const *volatile a;` 58 /// 59 /// It is non-trivial to get the start location because TypeLocs are stored 60 /// inside out. In the example above `*volatile` is the TypeLoc returned 61 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` 62 /// returns. 63 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { 64 SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { 65 auto L = Visit(T.getInnerLoc()); 66 if (L.isValid()) 67 return L; 68 return T.getLParenLoc(); 69 } 70 71 // Types spelled in the prefix part of the declarator. 72 SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { 73 return HandlePointer(T); 74 } 75 76 SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { 77 return HandlePointer(T); 78 } 79 80 SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { 81 return HandlePointer(T); 82 } 83 84 SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { 85 return HandlePointer(T); 86 } 87 88 SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { 89 return HandlePointer(T); 90 } 91 92 // All other cases are not important, as they are either part of declaration 93 // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on 94 // existing declarators (e.g. QualifiedTypeLoc). They cannot start the 95 // declarator themselves, but their underlying type can. 96 SourceLocation VisitTypeLoc(TypeLoc T) { 97 auto N = T.getNextTypeLoc(); 98 if (!N) 99 return SourceLocation(); 100 return Visit(N); 101 } 102 103 SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { 104 if (T.getTypePtr()->hasTrailingReturn()) 105 return SourceLocation(); // avoid recursing into the suffix of declarator. 106 return VisitTypeLoc(T); 107 } 108 109 private: 110 template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { 111 auto L = Visit(T.getPointeeLoc()); 112 if (L.isValid()) 113 return L; 114 return T.getLocalSourceRange().getBegin(); 115 } 116 }; 117 } // namespace 118 119 static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { 120 switch (E.getOperator()) { 121 // Comparison 122 case OO_EqualEqual: 123 case OO_ExclaimEqual: 124 case OO_Greater: 125 case OO_GreaterEqual: 126 case OO_Less: 127 case OO_LessEqual: 128 case OO_Spaceship: 129 // Assignment 130 case OO_Equal: 131 case OO_SlashEqual: 132 case OO_PercentEqual: 133 case OO_CaretEqual: 134 case OO_PipeEqual: 135 case OO_LessLessEqual: 136 case OO_GreaterGreaterEqual: 137 case OO_PlusEqual: 138 case OO_MinusEqual: 139 case OO_StarEqual: 140 case OO_AmpEqual: 141 // Binary computation 142 case OO_Slash: 143 case OO_Percent: 144 case OO_Caret: 145 case OO_Pipe: 146 case OO_LessLess: 147 case OO_GreaterGreater: 148 case OO_AmpAmp: 149 case OO_PipePipe: 150 case OO_ArrowStar: 151 case OO_Comma: 152 return syntax::NodeKind::BinaryOperatorExpression; 153 case OO_Tilde: 154 case OO_Exclaim: 155 return syntax::NodeKind::PrefixUnaryOperatorExpression; 156 // Prefix/Postfix increment/decrement 157 case OO_PlusPlus: 158 case OO_MinusMinus: 159 switch (E.getNumArgs()) { 160 case 1: 161 return syntax::NodeKind::PrefixUnaryOperatorExpression; 162 case 2: 163 return syntax::NodeKind::PostfixUnaryOperatorExpression; 164 default: 165 llvm_unreachable("Invalid number of arguments for operator"); 166 } 167 // Operators that can be unary or binary 168 case OO_Plus: 169 case OO_Minus: 170 case OO_Star: 171 case OO_Amp: 172 switch (E.getNumArgs()) { 173 case 1: 174 return syntax::NodeKind::PrefixUnaryOperatorExpression; 175 case 2: 176 return syntax::NodeKind::BinaryOperatorExpression; 177 default: 178 llvm_unreachable("Invalid number of arguments for operator"); 179 } 180 return syntax::NodeKind::BinaryOperatorExpression; 181 // Not yet supported by SyntaxTree 182 case OO_New: 183 case OO_Delete: 184 case OO_Array_New: 185 case OO_Array_Delete: 186 case OO_Coawait: 187 case OO_Subscript: 188 case OO_Arrow: 189 return syntax::NodeKind::UnknownExpression; 190 case OO_Call: 191 return syntax::NodeKind::CallExpression; 192 case OO_Conditional: // not overloadable 193 case NUM_OVERLOADED_OPERATORS: 194 case OO_None: 195 llvm_unreachable("Not an overloadable operator"); 196 } 197 llvm_unreachable("Unknown OverloadedOperatorKind enum"); 198 } 199 200 /// Get the start of the qualified name. In the examples below it gives the 201 /// location of the `^`: 202 /// `int ^a;` 203 /// `int *^a;` 204 /// `int ^a::S::f(){}` 205 static SourceLocation getQualifiedNameStart(NamedDecl *D) { 206 assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) && 207 "only DeclaratorDecl and TypedefNameDecl are supported."); 208 209 auto DN = D->getDeclName(); 210 bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); 211 if (IsAnonymous) 212 return SourceLocation(); 213 214 if (const auto *DD = dyn_cast<DeclaratorDecl>(D)) { 215 if (DD->getQualifierLoc()) { 216 return DD->getQualifierLoc().getBeginLoc(); 217 } 218 } 219 220 return D->getLocation(); 221 } 222 223 /// Gets the range of the initializer inside an init-declarator C++ [dcl.decl]. 224 /// `int a;` -> range of ``, 225 /// `int *a = nullptr` -> range of `= nullptr`. 226 /// `int a{}` -> range of `{}`. 227 /// `int a()` -> range of `()`. 228 static SourceRange getInitializerRange(Decl *D) { 229 if (auto *V = dyn_cast<VarDecl>(D)) { 230 auto *I = V->getInit(); 231 // Initializers in range-based-for are not part of the declarator 232 if (I && !V->isCXXForRangeDecl()) 233 return I->getSourceRange(); 234 } 235 236 return SourceRange(); 237 } 238 239 /// Gets the range of declarator as defined by the C++ grammar. E.g. 240 /// `int a;` -> range of `a`, 241 /// `int *a;` -> range of `*a`, 242 /// `int a[10];` -> range of `a[10]`, 243 /// `int a[1][2][3];` -> range of `a[1][2][3]`, 244 /// `int *a = nullptr` -> range of `*a = nullptr`. 245 /// `int S::f(){}` -> range of `S::f()`. 246 /// FIXME: \p Name must be a source range. 247 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, 248 SourceLocation Name, 249 SourceRange Initializer) { 250 SourceLocation Start = GetStartLoc().Visit(T); 251 SourceLocation End = T.getEndLoc(); 252 assert(End.isValid()); 253 if (Name.isValid()) { 254 if (Start.isInvalid()) 255 Start = Name; 256 if (SM.isBeforeInTranslationUnit(End, Name)) 257 End = Name; 258 } 259 if (Initializer.isValid()) { 260 auto InitializerEnd = Initializer.getEnd(); 261 assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || 262 End == InitializerEnd); 263 End = InitializerEnd; 264 } 265 return SourceRange(Start, End); 266 } 267 268 namespace { 269 /// All AST hierarchy roots that can be represented as pointers. 270 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; 271 /// Maintains a mapping from AST to syntax tree nodes. This class will get more 272 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs. 273 /// FIXME: expose this as public API. 274 class ASTToSyntaxMapping { 275 public: 276 void add(ASTPtr From, syntax::Tree *To) { 277 assert(To != nullptr); 278 assert(!From.isNull()); 279 280 bool Added = Nodes.insert({From, To}).second; 281 (void)Added; 282 assert(Added && "mapping added twice"); 283 } 284 285 void add(NestedNameSpecifierLoc From, syntax::Tree *To) { 286 assert(To != nullptr); 287 assert(From.hasQualifier()); 288 289 bool Added = NNSNodes.insert({From, To}).second; 290 (void)Added; 291 assert(Added && "mapping added twice"); 292 } 293 294 syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } 295 296 syntax::Tree *find(NestedNameSpecifierLoc P) const { 297 return NNSNodes.lookup(P); 298 } 299 300 private: 301 llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; 302 llvm::DenseMap<NestedNameSpecifierLoc, syntax::Tree *> NNSNodes; 303 }; 304 } // namespace 305 306 /// A helper class for constructing the syntax tree while traversing a clang 307 /// AST. 308 /// 309 /// At each point of the traversal we maintain a list of pending nodes. 310 /// Initially all tokens are added as pending nodes. When processing a clang AST 311 /// node, the clients need to: 312 /// - create a corresponding syntax node, 313 /// - assign roles to all pending child nodes with 'markChild' and 314 /// 'markChildToken', 315 /// - replace the child nodes with the new syntax node in the pending list 316 /// with 'foldNode'. 317 /// 318 /// Note that all children are expected to be processed when building a node. 319 /// 320 /// Call finalize() to finish building the tree and consume the root node. 321 class syntax::TreeBuilder { 322 public: 323 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { 324 for (const auto &T : Arena.tokenBuffer().expandedTokens()) 325 LocationToToken.insert({T.location().getRawEncoding(), &T}); 326 } 327 328 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 329 const SourceManager &sourceManager() const { return Arena.sourceManager(); } 330 331 /// Populate children for \p New node, assuming it covers tokens from \p 332 /// Range. 333 void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) { 334 assert(New); 335 Pending.foldChildren(Arena, Range, New); 336 if (From) 337 Mapping.add(From, New); 338 } 339 340 void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, TypeLoc L) { 341 // FIXME: add mapping for TypeLocs 342 foldNode(Range, New, nullptr); 343 } 344 345 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 346 NestedNameSpecifierLoc From) { 347 assert(New); 348 Pending.foldChildren(Arena, Range, New); 349 if (From) 350 Mapping.add(From, New); 351 } 352 353 /// Notifies that we should not consume trailing semicolon when computing 354 /// token range of \p D. 355 void noticeDeclWithoutSemicolon(Decl *D); 356 357 /// Mark the \p Child node with a corresponding \p Role. All marked children 358 /// should be consumed by foldNode. 359 /// When called on expressions (clang::Expr is derived from clang::Stmt), 360 /// wraps expressions into expression statement. 361 void markStmtChild(Stmt *Child, NodeRole Role); 362 /// Should be called for expressions in non-statement position to avoid 363 /// wrapping into expression statement. 364 void markExprChild(Expr *Child, NodeRole Role); 365 /// Set role for a token starting at \p Loc. 366 void markChildToken(SourceLocation Loc, NodeRole R); 367 /// Set role for \p T. 368 void markChildToken(const syntax::Token *T, NodeRole R); 369 370 /// Set role for \p N. 371 void markChild(syntax::Node *N, NodeRole R); 372 /// Set role for the syntax node matching \p N. 373 void markChild(ASTPtr N, NodeRole R); 374 /// Set role for the syntax node matching \p N. 375 void markChild(NestedNameSpecifierLoc N, NodeRole R); 376 377 /// Finish building the tree and consume the root node. 378 syntax::TranslationUnit *finalize() && { 379 auto Tokens = Arena.tokenBuffer().expandedTokens(); 380 assert(!Tokens.empty()); 381 assert(Tokens.back().kind() == tok::eof); 382 383 // Build the root of the tree, consuming all the children. 384 Pending.foldChildren(Arena, Tokens.drop_back(), 385 new (Arena.allocator()) syntax::TranslationUnit); 386 387 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 388 TU->assertInvariantsRecursive(); 389 return TU; 390 } 391 392 /// Finds a token starting at \p L. The token must exist if \p L is valid. 393 const syntax::Token *findToken(SourceLocation L) const; 394 395 /// Finds the syntax tokens corresponding to the \p SourceRange. 396 ArrayRef<syntax::Token> getRange(SourceRange Range) const { 397 assert(Range.isValid()); 398 return getRange(Range.getBegin(), Range.getEnd()); 399 } 400 401 /// Finds the syntax tokens corresponding to the passed source locations. 402 /// \p First is the start position of the first token and \p Last is the start 403 /// position of the last token. 404 ArrayRef<syntax::Token> getRange(SourceLocation First, 405 SourceLocation Last) const { 406 assert(First.isValid()); 407 assert(Last.isValid()); 408 assert(First == Last || 409 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 410 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 411 } 412 413 ArrayRef<syntax::Token> 414 getTemplateRange(const ClassTemplateSpecializationDecl *D) const { 415 auto Tokens = getRange(D->getSourceRange()); 416 return maybeAppendSemicolon(Tokens, D); 417 } 418 419 /// Returns true if \p D is the last declarator in a chain and is thus 420 /// reponsible for creating SimpleDeclaration for the whole chain. 421 bool isResponsibleForCreatingDeclaration(const Decl *D) const { 422 assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) && 423 "only DeclaratorDecl and TypedefNameDecl are supported."); 424 425 const Decl *Next = D->getNextDeclInContext(); 426 427 // There's no next sibling, this one is responsible. 428 if (Next == nullptr) { 429 return true; 430 } 431 432 // Next sibling is not the same type, this one is responsible. 433 if (D->getKind() != Next->getKind()) { 434 return true; 435 } 436 // Next sibling doesn't begin at the same loc, it must be a different 437 // declaration, so this declarator is responsible. 438 if (Next->getBeginLoc() != D->getBeginLoc()) { 439 return true; 440 } 441 442 // NextT is a member of the same declaration, and we need the last member to 443 // create declaration. This one is not responsible. 444 return false; 445 } 446 447 ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { 448 ArrayRef<syntax::Token> Tokens; 449 // We want to drop the template parameters for specializations. 450 if (const auto *S = dyn_cast<TagDecl>(D)) 451 Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); 452 else 453 Tokens = getRange(D->getSourceRange()); 454 return maybeAppendSemicolon(Tokens, D); 455 } 456 457 ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 458 return getRange(E->getSourceRange()); 459 } 460 461 /// Find the adjusted range for the statement, consuming the trailing 462 /// semicolon when needed. 463 ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 464 auto Tokens = getRange(S->getSourceRange()); 465 if (isa<CompoundStmt>(S)) 466 return Tokens; 467 468 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 469 // all statements that end with those. Consume this semicolon here. 470 if (Tokens.back().kind() == tok::semi) 471 return Tokens; 472 return withTrailingSemicolon(Tokens); 473 } 474 475 private: 476 ArrayRef<syntax::Token> maybeAppendSemicolon(ArrayRef<syntax::Token> Tokens, 477 const Decl *D) const { 478 if (isa<NamespaceDecl>(D)) 479 return Tokens; 480 if (DeclsWithoutSemicolons.count(D)) 481 return Tokens; 482 // FIXME: do not consume trailing semicolon on function definitions. 483 // Most declarations own a semicolon in syntax trees, but not in clang AST. 484 return withTrailingSemicolon(Tokens); 485 } 486 487 ArrayRef<syntax::Token> 488 withTrailingSemicolon(ArrayRef<syntax::Token> Tokens) const { 489 assert(!Tokens.empty()); 490 assert(Tokens.back().kind() != tok::eof); 491 // We never consume 'eof', so looking at the next token is ok. 492 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 493 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 494 return Tokens; 495 } 496 497 void setRole(syntax::Node *N, NodeRole R) { 498 assert(N->role() == NodeRole::Detached); 499 N->setRole(R); 500 } 501 502 /// A collection of trees covering the input tokens. 503 /// When created, each tree corresponds to a single token in the file. 504 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 505 /// node and update the list of trees accordingly. 506 /// 507 /// Ensures that added nodes properly nest and cover the whole token stream. 508 struct Forest { 509 Forest(syntax::Arena &A) { 510 assert(!A.tokenBuffer().expandedTokens().empty()); 511 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 512 // Create all leaf nodes. 513 // Note that we do not have 'eof' in the tree. 514 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { 515 auto *L = new (A.allocator()) syntax::Leaf(&T); 516 L->Original = true; 517 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); 518 Trees.insert(Trees.end(), {&T, L}); 519 } 520 } 521 522 void assignRole(ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { 523 assert(!Range.empty()); 524 auto It = Trees.lower_bound(Range.begin()); 525 assert(It != Trees.end() && "no node found"); 526 assert(It->first == Range.begin() && "no child with the specified range"); 527 assert((std::next(It) == Trees.end() || 528 std::next(It)->first == Range.end()) && 529 "no child with the specified range"); 530 assert(It->second->role() == NodeRole::Detached && 531 "re-assigning role for a child"); 532 It->second->setRole(Role); 533 } 534 535 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 536 void foldChildren(const syntax::Arena &A, ArrayRef<syntax::Token> Tokens, 537 syntax::Tree *Node) { 538 // Attach children to `Node`. 539 assert(Node->firstChild() == nullptr && "node already has children"); 540 541 auto *FirstToken = Tokens.begin(); 542 auto BeginChildren = Trees.lower_bound(FirstToken); 543 544 assert((BeginChildren == Trees.end() || 545 BeginChildren->first == FirstToken) && 546 "fold crosses boundaries of existing subtrees"); 547 auto EndChildren = Trees.lower_bound(Tokens.end()); 548 assert( 549 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 550 "fold crosses boundaries of existing subtrees"); 551 552 // We need to go in reverse order, because we can only prepend. 553 for (auto It = EndChildren; It != BeginChildren; --It) { 554 auto *C = std::prev(It)->second; 555 if (C->role() == NodeRole::Detached) 556 C->setRole(NodeRole::Unknown); 557 Node->prependChildLowLevel(C); 558 } 559 560 // Mark that this node came from the AST and is backed by the source code. 561 Node->Original = true; 562 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); 563 564 Trees.erase(BeginChildren, EndChildren); 565 Trees.insert({FirstToken, Node}); 566 } 567 568 // EXPECTS: all tokens were consumed and are owned by a single root node. 569 syntax::Node *finalize() && { 570 assert(Trees.size() == 1); 571 auto *Root = Trees.begin()->second; 572 Trees = {}; 573 return Root; 574 } 575 576 std::string str(const syntax::Arena &A) const { 577 std::string R; 578 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 579 unsigned CoveredTokens = 580 It != Trees.end() 581 ? (std::next(It)->first - It->first) 582 : A.tokenBuffer().expandedTokens().end() - It->first; 583 584 R += std::string( 585 formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), 586 It->first->text(A.sourceManager()), CoveredTokens)); 587 R += It->second->dump(A.sourceManager()); 588 } 589 return R; 590 } 591 592 private: 593 /// Maps from the start token to a subtree starting at that token. 594 /// Keys in the map are pointers into the array of expanded tokens, so 595 /// pointer order corresponds to the order of preprocessor tokens. 596 std::map<const syntax::Token *, syntax::Node *> Trees; 597 }; 598 599 /// For debugging purposes. 600 std::string str() { return Pending.str(Arena); } 601 602 syntax::Arena &Arena; 603 /// To quickly find tokens by their start location. 604 llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> 605 LocationToToken; 606 Forest Pending; 607 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 608 ASTToSyntaxMapping Mapping; 609 }; 610 611 namespace { 612 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 613 public: 614 explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) 615 : Builder(Builder), Context(Context) {} 616 617 bool shouldTraversePostOrder() const { return true; } 618 619 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { 620 return processDeclaratorAndDeclaration(DD); 621 } 622 623 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { 624 return processDeclaratorAndDeclaration(TD); 625 } 626 627 bool VisitDecl(Decl *D) { 628 assert(!D->isImplicit()); 629 Builder.foldNode(Builder.getDeclarationRange(D), 630 new (allocator()) syntax::UnknownDeclaration(), D); 631 return true; 632 } 633 634 // RAV does not call WalkUpFrom* on explicit instantiations, so we have to 635 // override Traverse. 636 // FIXME: make RAV call WalkUpFrom* instead. 637 bool 638 TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { 639 if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) 640 return false; 641 if (C->isExplicitSpecialization()) 642 return true; // we are only interested in explicit instantiations. 643 auto *Declaration = 644 cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); 645 foldExplicitTemplateInstantiation( 646 Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), 647 Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); 648 return true; 649 } 650 651 bool WalkUpFromTemplateDecl(TemplateDecl *S) { 652 foldTemplateDeclaration( 653 Builder.getDeclarationRange(S), 654 Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), 655 Builder.getDeclarationRange(S->getTemplatedDecl()), S); 656 return true; 657 } 658 659 bool WalkUpFromTagDecl(TagDecl *C) { 660 // FIXME: build the ClassSpecifier node. 661 if (!C->isFreeStanding()) { 662 assert(C->getNumTemplateParameterLists() == 0); 663 return true; 664 } 665 handleFreeStandingTagDecl(C); 666 return true; 667 } 668 669 syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { 670 assert(C->isFreeStanding()); 671 // Class is a declaration specifier and needs a spanning declaration node. 672 auto DeclarationRange = Builder.getDeclarationRange(C); 673 syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; 674 Builder.foldNode(DeclarationRange, Result, nullptr); 675 676 // Build TemplateDeclaration nodes if we had template parameters. 677 auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { 678 const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); 679 auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); 680 Result = 681 foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); 682 DeclarationRange = R; 683 }; 684 if (auto *S = dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) 685 ConsumeTemplateParameters(*S->getTemplateParameters()); 686 for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) 687 ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); 688 return Result; 689 } 690 691 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 692 // We do not want to call VisitDecl(), the declaration for translation 693 // unit is built by finalize(). 694 return true; 695 } 696 697 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 698 using NodeRole = syntax::NodeRole; 699 700 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 701 for (auto *Child : S->body()) 702 Builder.markStmtChild(Child, NodeRole::Statement); 703 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 704 705 Builder.foldNode(Builder.getStmtRange(S), 706 new (allocator()) syntax::CompoundStatement, S); 707 return true; 708 } 709 710 // Some statements are not yet handled by syntax trees. 711 bool WalkUpFromStmt(Stmt *S) { 712 Builder.foldNode(Builder.getStmtRange(S), 713 new (allocator()) syntax::UnknownStatement, S); 714 return true; 715 } 716 717 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 718 // We override to traverse range initializer as VarDecl. 719 // RAV traverses it as a statement, we produce invalid node kinds in that 720 // case. 721 // FIXME: should do this in RAV instead? 722 bool Result = [&, this]() { 723 if (S->getInit() && !TraverseStmt(S->getInit())) 724 return false; 725 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 726 return false; 727 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 728 return false; 729 if (S->getBody() && !TraverseStmt(S->getBody())) 730 return false; 731 return true; 732 }(); 733 WalkUpFromCXXForRangeStmt(S); 734 return Result; 735 } 736 737 bool TraverseStmt(Stmt *S) { 738 if (auto *DS = dyn_cast_or_null<DeclStmt>(S)) { 739 // We want to consume the semicolon, make sure SimpleDeclaration does not. 740 for (auto *D : DS->decls()) 741 Builder.noticeDeclWithoutSemicolon(D); 742 } else if (auto *E = dyn_cast_or_null<Expr>(S)) { 743 return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); 744 } 745 return RecursiveASTVisitor::TraverseStmt(S); 746 } 747 748 // Some expressions are not yet handled by syntax trees. 749 bool WalkUpFromExpr(Expr *E) { 750 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 751 Builder.foldNode(Builder.getExprRange(E), 752 new (allocator()) syntax::UnknownExpression, E); 753 return true; 754 } 755 756 bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { 757 // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node 758 // referencing the location of the UDL suffix (`_w` in `1.2_w`). The 759 // UDL suffix location does not point to the beginning of a token, so we 760 // can't represent the UDL suffix as a separate syntax tree node. 761 762 return WalkUpFromUserDefinedLiteral(S); 763 } 764 765 syntax::UserDefinedLiteralExpression * 766 buildUserDefinedLiteral(UserDefinedLiteral *S) { 767 switch (S->getLiteralOperatorKind()) { 768 case UserDefinedLiteral::LOK_Integer: 769 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 770 case UserDefinedLiteral::LOK_Floating: 771 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 772 case UserDefinedLiteral::LOK_Character: 773 return new (allocator()) syntax::CharUserDefinedLiteralExpression; 774 case UserDefinedLiteral::LOK_String: 775 return new (allocator()) syntax::StringUserDefinedLiteralExpression; 776 case UserDefinedLiteral::LOK_Raw: 777 case UserDefinedLiteral::LOK_Template: 778 // For raw literal operator and numeric literal operator template we 779 // cannot get the type of the operand in the semantic AST. We get this 780 // information from the token. As integer and floating point have the same 781 // token kind, we run `NumericLiteralParser` again to distinguish them. 782 auto TokLoc = S->getBeginLoc(); 783 auto TokSpelling = 784 Builder.findToken(TokLoc)->text(Context.getSourceManager()); 785 auto Literal = 786 NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), 787 Context.getLangOpts(), Context.getTargetInfo(), 788 Context.getDiagnostics()); 789 if (Literal.isIntegerLiteral()) 790 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 791 else { 792 assert(Literal.isFloatingLiteral()); 793 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 794 } 795 } 796 llvm_unreachable("Unknown literal operator kind."); 797 } 798 799 bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { 800 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 801 Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S); 802 return true; 803 } 804 805 // FIXME: Fix `NestedNameSpecifierLoc::getLocalSourceRange` for the 806 // `DependentTemplateSpecializationType` case. 807 /// Given a nested-name-specifier return the range for the last name 808 /// specifier. 809 /// 810 /// e.g. `std::T::template X<U>::` => `template X<U>::` 811 SourceRange getLocalSourceRange(const NestedNameSpecifierLoc &NNSLoc) { 812 auto SR = NNSLoc.getLocalSourceRange(); 813 814 // The method `NestedNameSpecifierLoc::getLocalSourceRange` *should* 815 // return the desired `SourceRange`, but there is a corner case. For a 816 // `DependentTemplateSpecializationType` this method returns its 817 // qualifiers as well, in other words in the example above this method 818 // returns `T::template X<U>::` instead of only `template X<U>::` 819 if (auto TL = NNSLoc.getTypeLoc()) { 820 if (auto DependentTL = 821 TL.getAs<DependentTemplateSpecializationTypeLoc>()) { 822 // The 'template' keyword is always present in dependent template 823 // specializations. Except in the case of incorrect code 824 // TODO: Treat the case of incorrect code. 825 SR.setBegin(DependentTL.getTemplateKeywordLoc()); 826 } 827 } 828 829 return SR; 830 } 831 832 syntax::NodeKind getNameSpecifierKind(const NestedNameSpecifier &NNS) { 833 switch (NNS.getKind()) { 834 case NestedNameSpecifier::Global: 835 return syntax::NodeKind::GlobalNameSpecifier; 836 case NestedNameSpecifier::Namespace: 837 case NestedNameSpecifier::NamespaceAlias: 838 case NestedNameSpecifier::Identifier: 839 return syntax::NodeKind::IdentifierNameSpecifier; 840 case NestedNameSpecifier::TypeSpecWithTemplate: 841 return syntax::NodeKind::SimpleTemplateNameSpecifier; 842 case NestedNameSpecifier::TypeSpec: { 843 const auto *NNSType = NNS.getAsType(); 844 assert(NNSType); 845 if (isa<DecltypeType>(NNSType)) 846 return syntax::NodeKind::DecltypeNameSpecifier; 847 if (isa<TemplateSpecializationType, DependentTemplateSpecializationType>( 848 NNSType)) 849 return syntax::NodeKind::SimpleTemplateNameSpecifier; 850 return syntax::NodeKind::IdentifierNameSpecifier; 851 } 852 default: 853 // FIXME: Support Microsoft's __super 854 llvm::report_fatal_error("We don't yet support the __super specifier", 855 true); 856 } 857 } 858 859 syntax::NameSpecifier * 860 buildNameSpecifier(const NestedNameSpecifierLoc &NNSLoc) { 861 assert(NNSLoc.hasQualifier()); 862 auto NameSpecifierTokens = 863 Builder.getRange(getLocalSourceRange(NNSLoc)).drop_back(); 864 switch (getNameSpecifierKind(*NNSLoc.getNestedNameSpecifier())) { 865 case syntax::NodeKind::GlobalNameSpecifier: 866 return new (allocator()) syntax::GlobalNameSpecifier; 867 case syntax::NodeKind::IdentifierNameSpecifier: { 868 assert(NameSpecifierTokens.size() == 1); 869 Builder.markChildToken(NameSpecifierTokens.begin(), 870 syntax::NodeRole::Unknown); 871 auto *NS = new (allocator()) syntax::IdentifierNameSpecifier; 872 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 873 return NS; 874 } 875 case syntax::NodeKind::SimpleTemplateNameSpecifier: { 876 // TODO: Build `SimpleTemplateNameSpecifier` children and implement 877 // accessors to them. 878 // Be aware, we cannot do that simply by calling `TraverseTypeLoc`, 879 // some `TypeLoc`s have inside them the previous name specifier and 880 // we want to treat them independently. 881 auto *NS = new (allocator()) syntax::SimpleTemplateNameSpecifier; 882 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 883 return NS; 884 } 885 case syntax::NodeKind::DecltypeNameSpecifier: { 886 const auto TL = NNSLoc.getTypeLoc().castAs<DecltypeTypeLoc>(); 887 if (!RecursiveASTVisitor::TraverseDecltypeTypeLoc(TL)) 888 return nullptr; 889 auto *NS = new (allocator()) syntax::DecltypeNameSpecifier; 890 // TODO: Implement accessor to `DecltypeNameSpecifier` inner 891 // `DecltypeTypeLoc`. 892 // For that add mapping from `TypeLoc` to `syntax::Node*` then: 893 // Builder.markChild(TypeLoc, syntax::NodeRole); 894 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 895 return NS; 896 } 897 default: 898 llvm_unreachable("getChildKind() does not return this value"); 899 } 900 } 901 902 // To build syntax tree nodes for NestedNameSpecifierLoc we override 903 // Traverse instead of WalkUpFrom because we want to traverse the children 904 // ourselves and build a list instead of a nested tree of name specifier 905 // prefixes. 906 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc QualifierLoc) { 907 if (!QualifierLoc) 908 return true; 909 for (auto it = QualifierLoc; it; it = it.getPrefix()) { 910 auto *NS = buildNameSpecifier(it); 911 if (!NS) 912 return false; 913 Builder.markChild(NS, syntax::NodeRole::ListElement); 914 Builder.markChildToken(it.getEndLoc(), syntax::NodeRole::ListDelimiter); 915 } 916 Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), 917 new (allocator()) syntax::NestedNameSpecifier, 918 QualifierLoc); 919 return true; 920 } 921 922 syntax::IdExpression *buildIdExpression(NestedNameSpecifierLoc QualifierLoc, 923 SourceLocation TemplateKeywordLoc, 924 SourceRange UnqualifiedIdLoc, 925 ASTPtr From) { 926 if (QualifierLoc) { 927 Builder.markChild(QualifierLoc, syntax::NodeRole::Qualifier); 928 if (TemplateKeywordLoc.isValid()) 929 Builder.markChildToken(TemplateKeywordLoc, 930 syntax::NodeRole::TemplateKeyword); 931 } 932 933 auto *TheUnqualifiedId = new (allocator()) syntax::UnqualifiedId; 934 Builder.foldNode(Builder.getRange(UnqualifiedIdLoc), TheUnqualifiedId, 935 nullptr); 936 Builder.markChild(TheUnqualifiedId, syntax::NodeRole::UnqualifiedId); 937 938 auto IdExpressionBeginLoc = 939 QualifierLoc ? QualifierLoc.getBeginLoc() : UnqualifiedIdLoc.getBegin(); 940 941 auto *TheIdExpression = new (allocator()) syntax::IdExpression; 942 Builder.foldNode( 943 Builder.getRange(IdExpressionBeginLoc, UnqualifiedIdLoc.getEnd()), 944 TheIdExpression, From); 945 946 return TheIdExpression; 947 } 948 949 bool WalkUpFromMemberExpr(MemberExpr *S) { 950 // For `MemberExpr` with implicit `this->` we generate a simple 951 // `id-expression` syntax node, beacuse an implicit `member-expression` is 952 // syntactically undistinguishable from an `id-expression` 953 if (S->isImplicitAccess()) { 954 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 955 SourceRange(S->getMemberLoc(), S->getEndLoc()), S); 956 return true; 957 } 958 959 auto *TheIdExpression = buildIdExpression( 960 S->getQualifierLoc(), S->getTemplateKeywordLoc(), 961 SourceRange(S->getMemberLoc(), S->getEndLoc()), nullptr); 962 963 Builder.markChild(TheIdExpression, syntax::NodeRole::Member); 964 965 Builder.markExprChild(S->getBase(), syntax::NodeRole::Object); 966 Builder.markChildToken(S->getOperatorLoc(), syntax::NodeRole::AccessToken); 967 968 Builder.foldNode(Builder.getExprRange(S), 969 new (allocator()) syntax::MemberExpression, S); 970 return true; 971 } 972 973 bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { 974 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 975 SourceRange(S->getLocation(), S->getEndLoc()), S); 976 977 return true; 978 } 979 980 // Same logic as DeclRefExpr. 981 bool WalkUpFromDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *S) { 982 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 983 SourceRange(S->getLocation(), S->getEndLoc()), S); 984 985 return true; 986 } 987 988 bool WalkUpFromCXXThisExpr(CXXThisExpr *S) { 989 if (!S->isImplicit()) { 990 Builder.markChildToken(S->getLocation(), 991 syntax::NodeRole::IntroducerKeyword); 992 Builder.foldNode(Builder.getExprRange(S), 993 new (allocator()) syntax::ThisExpression, S); 994 } 995 return true; 996 } 997 998 bool WalkUpFromParenExpr(ParenExpr *S) { 999 Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen); 1000 Builder.markExprChild(S->getSubExpr(), syntax::NodeRole::SubExpression); 1001 Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen); 1002 Builder.foldNode(Builder.getExprRange(S), 1003 new (allocator()) syntax::ParenExpression, S); 1004 return true; 1005 } 1006 1007 bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { 1008 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1009 Builder.foldNode(Builder.getExprRange(S), 1010 new (allocator()) syntax::IntegerLiteralExpression, S); 1011 return true; 1012 } 1013 1014 bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { 1015 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1016 Builder.foldNode(Builder.getExprRange(S), 1017 new (allocator()) syntax::CharacterLiteralExpression, S); 1018 return true; 1019 } 1020 1021 bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { 1022 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1023 Builder.foldNode(Builder.getExprRange(S), 1024 new (allocator()) syntax::FloatingLiteralExpression, S); 1025 return true; 1026 } 1027 1028 bool WalkUpFromStringLiteral(StringLiteral *S) { 1029 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 1030 Builder.foldNode(Builder.getExprRange(S), 1031 new (allocator()) syntax::StringLiteralExpression, S); 1032 return true; 1033 } 1034 1035 bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { 1036 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1037 Builder.foldNode(Builder.getExprRange(S), 1038 new (allocator()) syntax::BoolLiteralExpression, S); 1039 return true; 1040 } 1041 1042 bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { 1043 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1044 Builder.foldNode(Builder.getExprRange(S), 1045 new (allocator()) syntax::CxxNullPtrExpression, S); 1046 return true; 1047 } 1048 1049 bool WalkUpFromUnaryOperator(UnaryOperator *S) { 1050 Builder.markChildToken(S->getOperatorLoc(), 1051 syntax::NodeRole::OperatorToken); 1052 Builder.markExprChild(S->getSubExpr(), syntax::NodeRole::Operand); 1053 1054 if (S->isPostfix()) 1055 Builder.foldNode(Builder.getExprRange(S), 1056 new (allocator()) syntax::PostfixUnaryOperatorExpression, 1057 S); 1058 else 1059 Builder.foldNode(Builder.getExprRange(S), 1060 new (allocator()) syntax::PrefixUnaryOperatorExpression, 1061 S); 1062 1063 return true; 1064 } 1065 1066 bool WalkUpFromBinaryOperator(BinaryOperator *S) { 1067 Builder.markExprChild(S->getLHS(), syntax::NodeRole::LeftHandSide); 1068 Builder.markChildToken(S->getOperatorLoc(), 1069 syntax::NodeRole::OperatorToken); 1070 Builder.markExprChild(S->getRHS(), syntax::NodeRole::RightHandSide); 1071 Builder.foldNode(Builder.getExprRange(S), 1072 new (allocator()) syntax::BinaryOperatorExpression, S); 1073 return true; 1074 } 1075 1076 syntax::CallArguments *buildCallArguments(CallExpr::arg_range Args) { 1077 for (const auto &Arg : Args) { 1078 Builder.markExprChild(Arg, syntax::NodeRole::ListElement); 1079 const auto *DelimiterToken = 1080 std::next(Builder.findToken(Arg->getEndLoc())); 1081 if (DelimiterToken->kind() == clang::tok::TokenKind::comma) 1082 Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter); 1083 } 1084 1085 auto *Arguments = new (allocator()) syntax::CallArguments; 1086 if (!Args.empty()) 1087 Builder.foldNode(Builder.getRange((*Args.begin())->getBeginLoc(), 1088 (*(Args.end() - 1))->getEndLoc()), 1089 Arguments, nullptr); 1090 1091 return Arguments; 1092 } 1093 1094 bool WalkUpFromCallExpr(CallExpr *S) { 1095 Builder.markExprChild(S->getCallee(), syntax::NodeRole::Callee); 1096 1097 const auto *LParenToken = 1098 std::next(Builder.findToken(S->getCallee()->getEndLoc())); 1099 // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have fixed 1100 // the test on decltype desctructors. 1101 if (LParenToken->kind() == clang::tok::l_paren) 1102 Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen); 1103 1104 Builder.markChild(buildCallArguments(S->arguments()), 1105 syntax::NodeRole::Arguments); 1106 1107 Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen); 1108 1109 Builder.foldNode(Builder.getRange(S->getSourceRange()), 1110 new (allocator()) syntax::CallExpression, S); 1111 return true; 1112 } 1113 1114 bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 1115 // To construct a syntax tree of the same shape for calls to built-in and 1116 // user-defined operators, ignore the `DeclRefExpr` that refers to the 1117 // operator and treat it as a simple token. Do that by traversing 1118 // arguments instead of children. 1119 for (auto *child : S->arguments()) { 1120 // A postfix unary operator is declared as taking two operands. The 1121 // second operand is used to distinguish from its prefix counterpart. In 1122 // the semantic AST this "phantom" operand is represented as a 1123 // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this 1124 // operand because it does not correspond to anything written in source 1125 // code. 1126 if (child->getSourceRange().isInvalid()) { 1127 assert(getOperatorNodeKind(*S) == 1128 syntax::NodeKind::PostfixUnaryOperatorExpression); 1129 continue; 1130 } 1131 if (!TraverseStmt(child)) 1132 return false; 1133 } 1134 return WalkUpFromCXXOperatorCallExpr(S); 1135 } 1136 1137 bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 1138 switch (getOperatorNodeKind(*S)) { 1139 case syntax::NodeKind::BinaryOperatorExpression: 1140 Builder.markExprChild(S->getArg(0), syntax::NodeRole::LeftHandSide); 1141 Builder.markChildToken(S->getOperatorLoc(), 1142 syntax::NodeRole::OperatorToken); 1143 Builder.markExprChild(S->getArg(1), syntax::NodeRole::RightHandSide); 1144 Builder.foldNode(Builder.getExprRange(S), 1145 new (allocator()) syntax::BinaryOperatorExpression, S); 1146 return true; 1147 case syntax::NodeKind::PrefixUnaryOperatorExpression: 1148 Builder.markChildToken(S->getOperatorLoc(), 1149 syntax::NodeRole::OperatorToken); 1150 Builder.markExprChild(S->getArg(0), syntax::NodeRole::Operand); 1151 Builder.foldNode(Builder.getExprRange(S), 1152 new (allocator()) syntax::PrefixUnaryOperatorExpression, 1153 S); 1154 return true; 1155 case syntax::NodeKind::PostfixUnaryOperatorExpression: 1156 Builder.markChildToken(S->getOperatorLoc(), 1157 syntax::NodeRole::OperatorToken); 1158 Builder.markExprChild(S->getArg(0), syntax::NodeRole::Operand); 1159 Builder.foldNode(Builder.getExprRange(S), 1160 new (allocator()) syntax::PostfixUnaryOperatorExpression, 1161 S); 1162 return true; 1163 case syntax::NodeKind::CallExpression: { 1164 Builder.markExprChild(S->getArg(0), syntax::NodeRole::Callee); 1165 1166 const auto *LParenToken = 1167 std::next(Builder.findToken(S->getArg(0)->getEndLoc())); 1168 // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have 1169 // fixed the test on decltype desctructors. 1170 if (LParenToken->kind() == clang::tok::l_paren) 1171 Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen); 1172 1173 Builder.markChild(buildCallArguments(CallExpr::arg_range( 1174 S->arg_begin() + 1, S->arg_end())), 1175 syntax::NodeRole::Arguments); 1176 1177 Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen); 1178 1179 Builder.foldNode(Builder.getRange(S->getSourceRange()), 1180 new (allocator()) syntax::CallExpression, S); 1181 return true; 1182 } 1183 case syntax::NodeKind::UnknownExpression: 1184 return WalkUpFromExpr(S); 1185 default: 1186 llvm_unreachable("getOperatorNodeKind() does not return this value"); 1187 } 1188 } 1189 1190 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 1191 auto Tokens = Builder.getDeclarationRange(S); 1192 if (Tokens.front().kind() == tok::coloncolon) { 1193 // Handle nested namespace definitions. Those start at '::' token, e.g. 1194 // namespace a^::b {} 1195 // FIXME: build corresponding nodes for the name of this namespace. 1196 return true; 1197 } 1198 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); 1199 return true; 1200 } 1201 1202 // FIXME: Deleting the `TraverseParenTypeLoc` override doesn't change test 1203 // results. Find test coverage or remove it. 1204 bool TraverseParenTypeLoc(ParenTypeLoc L) { 1205 // We reverse order of traversal to get the proper syntax structure. 1206 if (!WalkUpFromParenTypeLoc(L)) 1207 return false; 1208 return TraverseTypeLoc(L.getInnerLoc()); 1209 } 1210 1211 bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { 1212 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 1213 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 1214 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), 1215 new (allocator()) syntax::ParenDeclarator, L); 1216 return true; 1217 } 1218 1219 // Declarator chunks, they are produced by type locs and some clang::Decls. 1220 bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { 1221 Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); 1222 Builder.markExprChild(L.getSizeExpr(), syntax::NodeRole::Size); 1223 Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); 1224 Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), 1225 new (allocator()) syntax::ArraySubscript, L); 1226 return true; 1227 } 1228 1229 syntax::ParameterDeclarationList * 1230 buildParameterDeclarationList(ArrayRef<ParmVarDecl *> Params) { 1231 for (auto *P : Params) { 1232 Builder.markChild(P, syntax::NodeRole::ListElement); 1233 const auto *DelimiterToken = std::next(Builder.findToken(P->getEndLoc())); 1234 if (DelimiterToken->kind() == clang::tok::TokenKind::comma) 1235 Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter); 1236 } 1237 auto *Parameters = new (allocator()) syntax::ParameterDeclarationList; 1238 if (!Params.empty()) 1239 Builder.foldNode(Builder.getRange(Params.front()->getBeginLoc(), 1240 Params.back()->getEndLoc()), 1241 Parameters, nullptr); 1242 return Parameters; 1243 } 1244 1245 bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { 1246 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 1247 1248 Builder.markChild(buildParameterDeclarationList(L.getParams()), 1249 syntax::NodeRole::Parameters); 1250 1251 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 1252 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), 1253 new (allocator()) syntax::ParametersAndQualifiers, L); 1254 return true; 1255 } 1256 1257 bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { 1258 if (!L.getTypePtr()->hasTrailingReturn()) 1259 return WalkUpFromFunctionTypeLoc(L); 1260 1261 auto *TrailingReturnTokens = buildTrailingReturn(L); 1262 // Finish building the node for parameters. 1263 Builder.markChild(TrailingReturnTokens, syntax::NodeRole::TrailingReturn); 1264 return WalkUpFromFunctionTypeLoc(L); 1265 } 1266 1267 bool TraverseMemberPointerTypeLoc(MemberPointerTypeLoc L) { 1268 // In the source code "void (Y::*mp)()" `MemberPointerTypeLoc` corresponds 1269 // to "Y::*" but it points to a `ParenTypeLoc` that corresponds to 1270 // "(Y::*mp)" We thus reverse the order of traversal to get the proper 1271 // syntax structure. 1272 if (!WalkUpFromMemberPointerTypeLoc(L)) 1273 return false; 1274 return TraverseTypeLoc(L.getPointeeLoc()); 1275 } 1276 1277 bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { 1278 auto SR = L.getLocalSourceRange(); 1279 Builder.foldNode(Builder.getRange(SR), 1280 new (allocator()) syntax::MemberPointer, L); 1281 return true; 1282 } 1283 1284 // The code below is very regular, it could even be generated with some 1285 // preprocessor magic. We merely assign roles to the corresponding children 1286 // and fold resulting nodes. 1287 bool WalkUpFromDeclStmt(DeclStmt *S) { 1288 Builder.foldNode(Builder.getStmtRange(S), 1289 new (allocator()) syntax::DeclarationStatement, S); 1290 return true; 1291 } 1292 1293 bool WalkUpFromNullStmt(NullStmt *S) { 1294 Builder.foldNode(Builder.getStmtRange(S), 1295 new (allocator()) syntax::EmptyStatement, S); 1296 return true; 1297 } 1298 1299 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 1300 Builder.markChildToken(S->getSwitchLoc(), 1301 syntax::NodeRole::IntroducerKeyword); 1302 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1303 Builder.foldNode(Builder.getStmtRange(S), 1304 new (allocator()) syntax::SwitchStatement, S); 1305 return true; 1306 } 1307 1308 bool WalkUpFromCaseStmt(CaseStmt *S) { 1309 Builder.markChildToken(S->getKeywordLoc(), 1310 syntax::NodeRole::IntroducerKeyword); 1311 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseValue); 1312 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1313 Builder.foldNode(Builder.getStmtRange(S), 1314 new (allocator()) syntax::CaseStatement, S); 1315 return true; 1316 } 1317 1318 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 1319 Builder.markChildToken(S->getKeywordLoc(), 1320 syntax::NodeRole::IntroducerKeyword); 1321 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1322 Builder.foldNode(Builder.getStmtRange(S), 1323 new (allocator()) syntax::DefaultStatement, S); 1324 return true; 1325 } 1326 1327 bool WalkUpFromIfStmt(IfStmt *S) { 1328 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 1329 Builder.markStmtChild(S->getThen(), syntax::NodeRole::ThenStatement); 1330 Builder.markChildToken(S->getElseLoc(), syntax::NodeRole::ElseKeyword); 1331 Builder.markStmtChild(S->getElse(), syntax::NodeRole::ElseStatement); 1332 Builder.foldNode(Builder.getStmtRange(S), 1333 new (allocator()) syntax::IfStatement, S); 1334 return true; 1335 } 1336 1337 bool WalkUpFromForStmt(ForStmt *S) { 1338 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1339 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1340 Builder.foldNode(Builder.getStmtRange(S), 1341 new (allocator()) syntax::ForStatement, S); 1342 return true; 1343 } 1344 1345 bool WalkUpFromWhileStmt(WhileStmt *S) { 1346 Builder.markChildToken(S->getWhileLoc(), 1347 syntax::NodeRole::IntroducerKeyword); 1348 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1349 Builder.foldNode(Builder.getStmtRange(S), 1350 new (allocator()) syntax::WhileStatement, S); 1351 return true; 1352 } 1353 1354 bool WalkUpFromContinueStmt(ContinueStmt *S) { 1355 Builder.markChildToken(S->getContinueLoc(), 1356 syntax::NodeRole::IntroducerKeyword); 1357 Builder.foldNode(Builder.getStmtRange(S), 1358 new (allocator()) syntax::ContinueStatement, S); 1359 return true; 1360 } 1361 1362 bool WalkUpFromBreakStmt(BreakStmt *S) { 1363 Builder.markChildToken(S->getBreakLoc(), 1364 syntax::NodeRole::IntroducerKeyword); 1365 Builder.foldNode(Builder.getStmtRange(S), 1366 new (allocator()) syntax::BreakStatement, S); 1367 return true; 1368 } 1369 1370 bool WalkUpFromReturnStmt(ReturnStmt *S) { 1371 Builder.markChildToken(S->getReturnLoc(), 1372 syntax::NodeRole::IntroducerKeyword); 1373 Builder.markExprChild(S->getRetValue(), syntax::NodeRole::ReturnValue); 1374 Builder.foldNode(Builder.getStmtRange(S), 1375 new (allocator()) syntax::ReturnStatement, S); 1376 return true; 1377 } 1378 1379 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 1380 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1381 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1382 Builder.foldNode(Builder.getStmtRange(S), 1383 new (allocator()) syntax::RangeBasedForStatement, S); 1384 return true; 1385 } 1386 1387 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 1388 Builder.foldNode(Builder.getDeclarationRange(S), 1389 new (allocator()) syntax::EmptyDeclaration, S); 1390 return true; 1391 } 1392 1393 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 1394 Builder.markExprChild(S->getAssertExpr(), syntax::NodeRole::Condition); 1395 Builder.markExprChild(S->getMessage(), syntax::NodeRole::Message); 1396 Builder.foldNode(Builder.getDeclarationRange(S), 1397 new (allocator()) syntax::StaticAssertDeclaration, S); 1398 return true; 1399 } 1400 1401 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 1402 Builder.foldNode(Builder.getDeclarationRange(S), 1403 new (allocator()) syntax::LinkageSpecificationDeclaration, 1404 S); 1405 return true; 1406 } 1407 1408 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 1409 Builder.foldNode(Builder.getDeclarationRange(S), 1410 new (allocator()) syntax::NamespaceAliasDefinition, S); 1411 return true; 1412 } 1413 1414 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 1415 Builder.foldNode(Builder.getDeclarationRange(S), 1416 new (allocator()) syntax::UsingNamespaceDirective, S); 1417 return true; 1418 } 1419 1420 bool WalkUpFromUsingDecl(UsingDecl *S) { 1421 Builder.foldNode(Builder.getDeclarationRange(S), 1422 new (allocator()) syntax::UsingDeclaration, S); 1423 return true; 1424 } 1425 1426 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 1427 Builder.foldNode(Builder.getDeclarationRange(S), 1428 new (allocator()) syntax::UsingDeclaration, S); 1429 return true; 1430 } 1431 1432 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 1433 Builder.foldNode(Builder.getDeclarationRange(S), 1434 new (allocator()) syntax::UsingDeclaration, S); 1435 return true; 1436 } 1437 1438 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 1439 Builder.foldNode(Builder.getDeclarationRange(S), 1440 new (allocator()) syntax::TypeAliasDeclaration, S); 1441 return true; 1442 } 1443 1444 private: 1445 /// Folds SimpleDeclarator node (if present) and in case this is the last 1446 /// declarator in the chain it also folds SimpleDeclaration node. 1447 template <class T> bool processDeclaratorAndDeclaration(T *D) { 1448 auto Range = getDeclaratorRange( 1449 Builder.sourceManager(), D->getTypeSourceInfo()->getTypeLoc(), 1450 getQualifiedNameStart(D), getInitializerRange(D)); 1451 1452 // There doesn't have to be a declarator (e.g. `void foo(int)` only has 1453 // declaration, but no declarator). 1454 if (Range.getBegin().isValid()) { 1455 auto *N = new (allocator()) syntax::SimpleDeclarator; 1456 Builder.foldNode(Builder.getRange(Range), N, nullptr); 1457 Builder.markChild(N, syntax::NodeRole::Declarator); 1458 } 1459 1460 if (Builder.isResponsibleForCreatingDeclaration(D)) { 1461 Builder.foldNode(Builder.getDeclarationRange(D), 1462 new (allocator()) syntax::SimpleDeclaration, D); 1463 } 1464 return true; 1465 } 1466 1467 /// Returns the range of the built node. 1468 syntax::TrailingReturnType *buildTrailingReturn(FunctionProtoTypeLoc L) { 1469 assert(L.getTypePtr()->hasTrailingReturn()); 1470 1471 auto ReturnedType = L.getReturnLoc(); 1472 // Build node for the declarator, if any. 1473 auto ReturnDeclaratorRange = SourceRange(GetStartLoc().Visit(ReturnedType), 1474 ReturnedType.getEndLoc()); 1475 syntax::SimpleDeclarator *ReturnDeclarator = nullptr; 1476 if (ReturnDeclaratorRange.isValid()) { 1477 ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; 1478 Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), 1479 ReturnDeclarator, nullptr); 1480 } 1481 1482 // Build node for trailing return type. 1483 auto Return = Builder.getRange(ReturnedType.getSourceRange()); 1484 const auto *Arrow = Return.begin() - 1; 1485 assert(Arrow->kind() == tok::arrow); 1486 auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); 1487 Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken); 1488 if (ReturnDeclarator) 1489 Builder.markChild(ReturnDeclarator, syntax::NodeRole::Declarator); 1490 auto *R = new (allocator()) syntax::TrailingReturnType; 1491 Builder.foldNode(Tokens, R, L); 1492 return R; 1493 } 1494 1495 void foldExplicitTemplateInstantiation( 1496 ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, 1497 const syntax::Token *TemplateKW, 1498 syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { 1499 assert(!ExternKW || ExternKW->kind() == tok::kw_extern); 1500 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1501 Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword); 1502 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1503 Builder.markChild(InnerDeclaration, syntax::NodeRole::Declaration); 1504 Builder.foldNode( 1505 Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); 1506 } 1507 1508 syntax::TemplateDeclaration *foldTemplateDeclaration( 1509 ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, 1510 ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { 1511 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1512 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1513 1514 auto *N = new (allocator()) syntax::TemplateDeclaration; 1515 Builder.foldNode(Range, N, From); 1516 Builder.markChild(N, syntax::NodeRole::Declaration); 1517 return N; 1518 } 1519 1520 /// A small helper to save some typing. 1521 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 1522 1523 syntax::TreeBuilder &Builder; 1524 const ASTContext &Context; 1525 }; 1526 } // namespace 1527 1528 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { 1529 DeclsWithoutSemicolons.insert(D); 1530 } 1531 1532 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 1533 if (Loc.isInvalid()) 1534 return; 1535 Pending.assignRole(*findToken(Loc), Role); 1536 } 1537 1538 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { 1539 if (!T) 1540 return; 1541 Pending.assignRole(*T, R); 1542 } 1543 1544 void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { 1545 assert(N); 1546 setRole(N, R); 1547 } 1548 1549 void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { 1550 auto *SN = Mapping.find(N); 1551 assert(SN != nullptr); 1552 setRole(SN, R); 1553 } 1554 void syntax::TreeBuilder::markChild(NestedNameSpecifierLoc NNSLoc, NodeRole R) { 1555 auto *SN = Mapping.find(NNSLoc); 1556 assert(SN != nullptr); 1557 setRole(SN, R); 1558 } 1559 1560 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 1561 if (!Child) 1562 return; 1563 1564 syntax::Tree *ChildNode; 1565 if (Expr *ChildExpr = dyn_cast<Expr>(Child)) { 1566 // This is an expression in a statement position, consume the trailing 1567 // semicolon and form an 'ExpressionStatement' node. 1568 markExprChild(ChildExpr, NodeRole::Expression); 1569 ChildNode = new (allocator()) syntax::ExpressionStatement; 1570 // (!) 'getStmtRange()' ensures this covers a trailing semicolon. 1571 Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); 1572 } else { 1573 ChildNode = Mapping.find(Child); 1574 } 1575 assert(ChildNode != nullptr); 1576 setRole(ChildNode, Role); 1577 } 1578 1579 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 1580 if (!Child) 1581 return; 1582 Child = Child->IgnoreImplicit(); 1583 1584 syntax::Tree *ChildNode = Mapping.find(Child); 1585 assert(ChildNode != nullptr); 1586 setRole(ChildNode, Role); 1587 } 1588 1589 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 1590 if (L.isInvalid()) 1591 return nullptr; 1592 auto It = LocationToToken.find(L.getRawEncoding()); 1593 assert(It != LocationToToken.end()); 1594 return It->second; 1595 } 1596 1597 syntax::TranslationUnit * 1598 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 1599 TreeBuilder Builder(A); 1600 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 1601 return std::move(Builder).finalize(); 1602 } 1603