1 //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "clang/Tooling/Syntax/BuildTree.h" 9 #include "clang/AST/ASTFwd.h" 10 #include "clang/AST/Decl.h" 11 #include "clang/AST/DeclBase.h" 12 #include "clang/AST/DeclCXX.h" 13 #include "clang/AST/DeclarationName.h" 14 #include "clang/AST/Expr.h" 15 #include "clang/AST/ExprCXX.h" 16 #include "clang/AST/RecursiveASTVisitor.h" 17 #include "clang/AST/Stmt.h" 18 #include "clang/AST/TypeLoc.h" 19 #include "clang/AST/TypeLocVisitor.h" 20 #include "clang/Basic/LLVM.h" 21 #include "clang/Basic/SourceLocation.h" 22 #include "clang/Basic/SourceManager.h" 23 #include "clang/Basic/Specifiers.h" 24 #include "clang/Basic/TokenKinds.h" 25 #include "clang/Lex/Lexer.h" 26 #include "clang/Lex/LiteralSupport.h" 27 #include "clang/Tooling/Syntax/Nodes.h" 28 #include "clang/Tooling/Syntax/Tokens.h" 29 #include "clang/Tooling/Syntax/Tree.h" 30 #include "llvm/ADT/ArrayRef.h" 31 #include "llvm/ADT/DenseMap.h" 32 #include "llvm/ADT/PointerUnion.h" 33 #include "llvm/ADT/STLExtras.h" 34 #include "llvm/ADT/ScopeExit.h" 35 #include "llvm/ADT/SmallVector.h" 36 #include "llvm/Support/Allocator.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/FormatVariadic.h" 40 #include "llvm/Support/MemoryBuffer.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include <cstddef> 43 #include <map> 44 45 using namespace clang; 46 47 LLVM_ATTRIBUTE_UNUSED 48 static bool isImplicitExpr(Expr *E) { return E->IgnoreImplicit() != E; } 49 50 namespace { 51 /// Get start location of the Declarator from the TypeLoc. 52 /// E.g.: 53 /// loc of `(` in `int (a)` 54 /// loc of `*` in `int *(a)` 55 /// loc of the first `(` in `int (*a)(int)` 56 /// loc of the `*` in `int *(a)(int)` 57 /// loc of the first `*` in `const int *const *volatile a;` 58 /// 59 /// It is non-trivial to get the start location because TypeLocs are stored 60 /// inside out. In the example above `*volatile` is the TypeLoc returned 61 /// by `Decl.getTypeSourceInfo()`, and `*const` is what `.getPointeeLoc()` 62 /// returns. 63 struct GetStartLoc : TypeLocVisitor<GetStartLoc, SourceLocation> { 64 SourceLocation VisitParenTypeLoc(ParenTypeLoc T) { 65 auto L = Visit(T.getInnerLoc()); 66 if (L.isValid()) 67 return L; 68 return T.getLParenLoc(); 69 } 70 71 // Types spelled in the prefix part of the declarator. 72 SourceLocation VisitPointerTypeLoc(PointerTypeLoc T) { 73 return HandlePointer(T); 74 } 75 76 SourceLocation VisitMemberPointerTypeLoc(MemberPointerTypeLoc T) { 77 return HandlePointer(T); 78 } 79 80 SourceLocation VisitBlockPointerTypeLoc(BlockPointerTypeLoc T) { 81 return HandlePointer(T); 82 } 83 84 SourceLocation VisitReferenceTypeLoc(ReferenceTypeLoc T) { 85 return HandlePointer(T); 86 } 87 88 SourceLocation VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc T) { 89 return HandlePointer(T); 90 } 91 92 // All other cases are not important, as they are either part of declaration 93 // specifiers (e.g. inheritors of TypeSpecTypeLoc) or introduce modifiers on 94 // existing declarators (e.g. QualifiedTypeLoc). They cannot start the 95 // declarator themselves, but their underlying type can. 96 SourceLocation VisitTypeLoc(TypeLoc T) { 97 auto N = T.getNextTypeLoc(); 98 if (!N) 99 return SourceLocation(); 100 return Visit(N); 101 } 102 103 SourceLocation VisitFunctionProtoTypeLoc(FunctionProtoTypeLoc T) { 104 if (T.getTypePtr()->hasTrailingReturn()) 105 return SourceLocation(); // avoid recursing into the suffix of declarator. 106 return VisitTypeLoc(T); 107 } 108 109 private: 110 template <class PtrLoc> SourceLocation HandlePointer(PtrLoc T) { 111 auto L = Visit(T.getPointeeLoc()); 112 if (L.isValid()) 113 return L; 114 return T.getLocalSourceRange().getBegin(); 115 } 116 }; 117 } // namespace 118 119 static syntax::NodeKind getOperatorNodeKind(const CXXOperatorCallExpr &E) { 120 switch (E.getOperator()) { 121 // Comparison 122 case OO_EqualEqual: 123 case OO_ExclaimEqual: 124 case OO_Greater: 125 case OO_GreaterEqual: 126 case OO_Less: 127 case OO_LessEqual: 128 case OO_Spaceship: 129 // Assignment 130 case OO_Equal: 131 case OO_SlashEqual: 132 case OO_PercentEqual: 133 case OO_CaretEqual: 134 case OO_PipeEqual: 135 case OO_LessLessEqual: 136 case OO_GreaterGreaterEqual: 137 case OO_PlusEqual: 138 case OO_MinusEqual: 139 case OO_StarEqual: 140 case OO_AmpEqual: 141 // Binary computation 142 case OO_Slash: 143 case OO_Percent: 144 case OO_Caret: 145 case OO_Pipe: 146 case OO_LessLess: 147 case OO_GreaterGreater: 148 case OO_AmpAmp: 149 case OO_PipePipe: 150 case OO_ArrowStar: 151 case OO_Comma: 152 return syntax::NodeKind::BinaryOperatorExpression; 153 case OO_Tilde: 154 case OO_Exclaim: 155 return syntax::NodeKind::PrefixUnaryOperatorExpression; 156 // Prefix/Postfix increment/decrement 157 case OO_PlusPlus: 158 case OO_MinusMinus: 159 switch (E.getNumArgs()) { 160 case 1: 161 return syntax::NodeKind::PrefixUnaryOperatorExpression; 162 case 2: 163 return syntax::NodeKind::PostfixUnaryOperatorExpression; 164 default: 165 llvm_unreachable("Invalid number of arguments for operator"); 166 } 167 // Operators that can be unary or binary 168 case OO_Plus: 169 case OO_Minus: 170 case OO_Star: 171 case OO_Amp: 172 switch (E.getNumArgs()) { 173 case 1: 174 return syntax::NodeKind::PrefixUnaryOperatorExpression; 175 case 2: 176 return syntax::NodeKind::BinaryOperatorExpression; 177 default: 178 llvm_unreachable("Invalid number of arguments for operator"); 179 } 180 return syntax::NodeKind::BinaryOperatorExpression; 181 // Not yet supported by SyntaxTree 182 case OO_New: 183 case OO_Delete: 184 case OO_Array_New: 185 case OO_Array_Delete: 186 case OO_Coawait: 187 case OO_Subscript: 188 case OO_Arrow: 189 return syntax::NodeKind::UnknownExpression; 190 case OO_Call: 191 return syntax::NodeKind::CallExpression; 192 case OO_Conditional: // not overloadable 193 case NUM_OVERLOADED_OPERATORS: 194 case OO_None: 195 llvm_unreachable("Not an overloadable operator"); 196 } 197 llvm_unreachable("Unknown OverloadedOperatorKind enum"); 198 } 199 200 /// Get the start of the qualified name. In the examples below it gives the 201 /// location of the `^`: 202 /// `int ^a;` 203 /// `int ^a::S::f(){}` 204 static SourceLocation getQualifiedNameStart(NamedDecl *D) { 205 assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) && 206 "only DeclaratorDecl and TypedefNameDecl are supported."); 207 208 auto DN = D->getDeclName(); 209 bool IsAnonymous = DN.isIdentifier() && !DN.getAsIdentifierInfo(); 210 if (IsAnonymous) 211 return SourceLocation(); 212 213 if (const auto *DD = dyn_cast<DeclaratorDecl>(D)) { 214 if (DD->getQualifierLoc()) { 215 return DD->getQualifierLoc().getBeginLoc(); 216 } 217 } 218 219 return D->getLocation(); 220 } 221 222 /// Gets the range of the initializer inside an init-declarator C++ [dcl.decl]. 223 /// `int a;` -> range of ``, 224 /// `int *a = nullptr` -> range of `= nullptr`. 225 /// `int a{}` -> range of `{}`. 226 /// `int a()` -> range of `()`. 227 static SourceRange getInitializerRange(Decl *D) { 228 if (auto *V = dyn_cast<VarDecl>(D)) { 229 auto *I = V->getInit(); 230 // Initializers in range-based-for are not part of the declarator 231 if (I && !V->isCXXForRangeDecl()) 232 return I->getSourceRange(); 233 } 234 235 return SourceRange(); 236 } 237 238 /// Gets the range of declarator as defined by the C++ grammar. E.g. 239 /// `int a;` -> range of `a`, 240 /// `int *a;` -> range of `*a`, 241 /// `int a[10];` -> range of `a[10]`, 242 /// `int a[1][2][3];` -> range of `a[1][2][3]`, 243 /// `int *a = nullptr` -> range of `*a = nullptr`. 244 /// `int S::f(){}` -> range of `S::f()`. 245 /// FIXME: \p Name must be a source range, e.g. for `operator+`. 246 static SourceRange getDeclaratorRange(const SourceManager &SM, TypeLoc T, 247 SourceLocation Name, 248 SourceRange Initializer) { 249 SourceLocation Start = GetStartLoc().Visit(T); 250 SourceLocation End = T.getEndLoc(); 251 assert(End.isValid()); 252 if (Name.isValid()) { 253 if (Start.isInvalid()) 254 Start = Name; 255 if (SM.isBeforeInTranslationUnit(End, Name)) 256 End = Name; 257 } 258 if (Initializer.isValid()) { 259 auto InitializerEnd = Initializer.getEnd(); 260 assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || 261 End == InitializerEnd); 262 End = InitializerEnd; 263 } 264 return SourceRange(Start, End); 265 } 266 267 namespace { 268 /// All AST hierarchy roots that can be represented as pointers. 269 using ASTPtr = llvm::PointerUnion<Stmt *, Decl *>; 270 /// Maintains a mapping from AST to syntax tree nodes. This class will get more 271 /// complicated as we support more kinds of AST nodes, e.g. TypeLocs. 272 /// FIXME: expose this as public API. 273 class ASTToSyntaxMapping { 274 public: 275 void add(ASTPtr From, syntax::Tree *To) { 276 assert(To != nullptr); 277 assert(!From.isNull()); 278 279 bool Added = Nodes.insert({From, To}).second; 280 (void)Added; 281 assert(Added && "mapping added twice"); 282 } 283 284 void add(NestedNameSpecifierLoc From, syntax::Tree *To) { 285 assert(To != nullptr); 286 assert(From.hasQualifier()); 287 288 bool Added = NNSNodes.insert({From, To}).second; 289 (void)Added; 290 assert(Added && "mapping added twice"); 291 } 292 293 syntax::Tree *find(ASTPtr P) const { return Nodes.lookup(P); } 294 295 syntax::Tree *find(NestedNameSpecifierLoc P) const { 296 return NNSNodes.lookup(P); 297 } 298 299 private: 300 llvm::DenseMap<ASTPtr, syntax::Tree *> Nodes; 301 llvm::DenseMap<NestedNameSpecifierLoc, syntax::Tree *> NNSNodes; 302 }; 303 } // namespace 304 305 /// A helper class for constructing the syntax tree while traversing a clang 306 /// AST. 307 /// 308 /// At each point of the traversal we maintain a list of pending nodes. 309 /// Initially all tokens are added as pending nodes. When processing a clang AST 310 /// node, the clients need to: 311 /// - create a corresponding syntax node, 312 /// - assign roles to all pending child nodes with 'markChild' and 313 /// 'markChildToken', 314 /// - replace the child nodes with the new syntax node in the pending list 315 /// with 'foldNode'. 316 /// 317 /// Note that all children are expected to be processed when building a node. 318 /// 319 /// Call finalize() to finish building the tree and consume the root node. 320 class syntax::TreeBuilder { 321 public: 322 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { 323 for (const auto &T : Arena.tokenBuffer().expandedTokens()) 324 LocationToToken.insert({T.location().getRawEncoding(), &T}); 325 } 326 327 llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); } 328 const SourceManager &sourceManager() const { return Arena.sourceManager(); } 329 330 /// Populate children for \p New node, assuming it covers tokens from \p 331 /// Range. 332 void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, ASTPtr From) { 333 assert(New); 334 Pending.foldChildren(Arena, Range, New); 335 if (From) 336 Mapping.add(From, New); 337 } 338 339 void foldNode(ArrayRef<syntax::Token> Range, syntax::Tree *New, TypeLoc L) { 340 // FIXME: add mapping for TypeLocs 341 foldNode(Range, New, nullptr); 342 } 343 344 void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New, 345 NestedNameSpecifierLoc From) { 346 assert(New); 347 Pending.foldChildren(Arena, Range, New); 348 if (From) 349 Mapping.add(From, New); 350 } 351 352 /// Notifies that we should not consume trailing semicolon when computing 353 /// token range of \p D. 354 void noticeDeclWithoutSemicolon(Decl *D); 355 356 /// Mark the \p Child node with a corresponding \p Role. All marked children 357 /// should be consumed by foldNode. 358 /// When called on expressions (clang::Expr is derived from clang::Stmt), 359 /// wraps expressions into expression statement. 360 void markStmtChild(Stmt *Child, NodeRole Role); 361 /// Should be called for expressions in non-statement position to avoid 362 /// wrapping into expression statement. 363 void markExprChild(Expr *Child, NodeRole Role); 364 /// Set role for a token starting at \p Loc. 365 void markChildToken(SourceLocation Loc, NodeRole R); 366 /// Set role for \p T. 367 void markChildToken(const syntax::Token *T, NodeRole R); 368 369 /// Set role for \p N. 370 void markChild(syntax::Node *N, NodeRole R); 371 /// Set role for the syntax node matching \p N. 372 void markChild(ASTPtr N, NodeRole R); 373 /// Set role for the syntax node matching \p N. 374 void markChild(NestedNameSpecifierLoc N, NodeRole R); 375 376 /// Finish building the tree and consume the root node. 377 syntax::TranslationUnit *finalize() && { 378 auto Tokens = Arena.tokenBuffer().expandedTokens(); 379 assert(!Tokens.empty()); 380 assert(Tokens.back().kind() == tok::eof); 381 382 // Build the root of the tree, consuming all the children. 383 Pending.foldChildren(Arena, Tokens.drop_back(), 384 new (Arena.allocator()) syntax::TranslationUnit); 385 386 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize()); 387 TU->assertInvariantsRecursive(); 388 return TU; 389 } 390 391 /// Finds a token starting at \p L. The token must exist if \p L is valid. 392 const syntax::Token *findToken(SourceLocation L) const; 393 394 /// Finds the syntax tokens corresponding to the \p SourceRange. 395 ArrayRef<syntax::Token> getRange(SourceRange Range) const { 396 assert(Range.isValid()); 397 return getRange(Range.getBegin(), Range.getEnd()); 398 } 399 400 /// Finds the syntax tokens corresponding to the passed source locations. 401 /// \p First is the start position of the first token and \p Last is the start 402 /// position of the last token. 403 ArrayRef<syntax::Token> getRange(SourceLocation First, 404 SourceLocation Last) const { 405 assert(First.isValid()); 406 assert(Last.isValid()); 407 assert(First == Last || 408 Arena.sourceManager().isBeforeInTranslationUnit(First, Last)); 409 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); 410 } 411 412 ArrayRef<syntax::Token> 413 getTemplateRange(const ClassTemplateSpecializationDecl *D) const { 414 auto Tokens = getRange(D->getSourceRange()); 415 return maybeAppendSemicolon(Tokens, D); 416 } 417 418 /// Returns true if \p D is the last declarator in a chain and is thus 419 /// reponsible for creating SimpleDeclaration for the whole chain. 420 bool isResponsibleForCreatingDeclaration(const Decl *D) const { 421 assert((isa<DeclaratorDecl, TypedefNameDecl>(D)) && 422 "only DeclaratorDecl and TypedefNameDecl are supported."); 423 424 const Decl *Next = D->getNextDeclInContext(); 425 426 // There's no next sibling, this one is responsible. 427 if (Next == nullptr) { 428 return true; 429 } 430 431 // Next sibling is not the same type, this one is responsible. 432 if (D->getKind() != Next->getKind()) { 433 return true; 434 } 435 // Next sibling doesn't begin at the same loc, it must be a different 436 // declaration, so this declarator is responsible. 437 if (Next->getBeginLoc() != D->getBeginLoc()) { 438 return true; 439 } 440 441 // NextT is a member of the same declaration, and we need the last member to 442 // create declaration. This one is not responsible. 443 return false; 444 } 445 446 ArrayRef<syntax::Token> getDeclarationRange(Decl *D) { 447 ArrayRef<syntax::Token> Tokens; 448 // We want to drop the template parameters for specializations. 449 if (const auto *S = dyn_cast<TagDecl>(D)) 450 Tokens = getRange(S->TypeDecl::getBeginLoc(), S->getEndLoc()); 451 else 452 Tokens = getRange(D->getSourceRange()); 453 return maybeAppendSemicolon(Tokens, D); 454 } 455 456 ArrayRef<syntax::Token> getExprRange(const Expr *E) const { 457 return getRange(E->getSourceRange()); 458 } 459 460 /// Find the adjusted range for the statement, consuming the trailing 461 /// semicolon when needed. 462 ArrayRef<syntax::Token> getStmtRange(const Stmt *S) const { 463 auto Tokens = getRange(S->getSourceRange()); 464 if (isa<CompoundStmt>(S)) 465 return Tokens; 466 467 // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and 468 // all statements that end with those. Consume this semicolon here. 469 if (Tokens.back().kind() == tok::semi) 470 return Tokens; 471 return withTrailingSemicolon(Tokens); 472 } 473 474 private: 475 ArrayRef<syntax::Token> maybeAppendSemicolon(ArrayRef<syntax::Token> Tokens, 476 const Decl *D) const { 477 if (isa<NamespaceDecl>(D)) 478 return Tokens; 479 if (DeclsWithoutSemicolons.count(D)) 480 return Tokens; 481 // FIXME: do not consume trailing semicolon on function definitions. 482 // Most declarations own a semicolon in syntax trees, but not in clang AST. 483 return withTrailingSemicolon(Tokens); 484 } 485 486 ArrayRef<syntax::Token> 487 withTrailingSemicolon(ArrayRef<syntax::Token> Tokens) const { 488 assert(!Tokens.empty()); 489 assert(Tokens.back().kind() != tok::eof); 490 // We never consume 'eof', so looking at the next token is ok. 491 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) 492 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); 493 return Tokens; 494 } 495 496 void setRole(syntax::Node *N, NodeRole R) { 497 assert(N->role() == NodeRole::Detached); 498 N->setRole(R); 499 } 500 501 /// A collection of trees covering the input tokens. 502 /// When created, each tree corresponds to a single token in the file. 503 /// Clients call 'foldChildren' to attach one or more subtrees to a parent 504 /// node and update the list of trees accordingly. 505 /// 506 /// Ensures that added nodes properly nest and cover the whole token stream. 507 struct Forest { 508 Forest(syntax::Arena &A) { 509 assert(!A.tokenBuffer().expandedTokens().empty()); 510 assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); 511 // Create all leaf nodes. 512 // Note that we do not have 'eof' in the tree. 513 for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { 514 auto *L = new (A.allocator()) syntax::Leaf(&T); 515 L->Original = true; 516 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); 517 Trees.insert(Trees.end(), {&T, L}); 518 } 519 } 520 521 void assignRole(ArrayRef<syntax::Token> Range, syntax::NodeRole Role) { 522 assert(!Range.empty()); 523 auto It = Trees.lower_bound(Range.begin()); 524 assert(It != Trees.end() && "no node found"); 525 assert(It->first == Range.begin() && "no child with the specified range"); 526 assert((std::next(It) == Trees.end() || 527 std::next(It)->first == Range.end()) && 528 "no child with the specified range"); 529 assert(It->second->role() == NodeRole::Detached && 530 "re-assigning role for a child"); 531 It->second->setRole(Role); 532 } 533 534 /// Add \p Node to the forest and attach child nodes based on \p Tokens. 535 void foldChildren(const syntax::Arena &A, ArrayRef<syntax::Token> Tokens, 536 syntax::Tree *Node) { 537 // Attach children to `Node`. 538 assert(Node->firstChild() == nullptr && "node already has children"); 539 540 auto *FirstToken = Tokens.begin(); 541 auto BeginChildren = Trees.lower_bound(FirstToken); 542 543 assert((BeginChildren == Trees.end() || 544 BeginChildren->first == FirstToken) && 545 "fold crosses boundaries of existing subtrees"); 546 auto EndChildren = Trees.lower_bound(Tokens.end()); 547 assert( 548 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) && 549 "fold crosses boundaries of existing subtrees"); 550 551 // We need to go in reverse order, because we can only prepend. 552 for (auto It = EndChildren; It != BeginChildren; --It) { 553 auto *C = std::prev(It)->second; 554 if (C->role() == NodeRole::Detached) 555 C->setRole(NodeRole::Unknown); 556 Node->prependChildLowLevel(C); 557 } 558 559 // Mark that this node came from the AST and is backed by the source code. 560 Node->Original = true; 561 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); 562 563 Trees.erase(BeginChildren, EndChildren); 564 Trees.insert({FirstToken, Node}); 565 } 566 567 // EXPECTS: all tokens were consumed and are owned by a single root node. 568 syntax::Node *finalize() && { 569 assert(Trees.size() == 1); 570 auto *Root = Trees.begin()->second; 571 Trees = {}; 572 return Root; 573 } 574 575 std::string str(const syntax::Arena &A) const { 576 std::string R; 577 for (auto It = Trees.begin(); It != Trees.end(); ++It) { 578 unsigned CoveredTokens = 579 It != Trees.end() 580 ? (std::next(It)->first - It->first) 581 : A.tokenBuffer().expandedTokens().end() - It->first; 582 583 R += std::string( 584 formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->kind(), 585 It->first->text(A.sourceManager()), CoveredTokens)); 586 R += It->second->dump(A.sourceManager()); 587 } 588 return R; 589 } 590 591 private: 592 /// Maps from the start token to a subtree starting at that token. 593 /// Keys in the map are pointers into the array of expanded tokens, so 594 /// pointer order corresponds to the order of preprocessor tokens. 595 std::map<const syntax::Token *, syntax::Node *> Trees; 596 }; 597 598 /// For debugging purposes. 599 std::string str() { return Pending.str(Arena); } 600 601 syntax::Arena &Arena; 602 /// To quickly find tokens by their start location. 603 llvm::DenseMap</*SourceLocation*/ unsigned, const syntax::Token *> 604 LocationToToken; 605 Forest Pending; 606 llvm::DenseSet<Decl *> DeclsWithoutSemicolons; 607 ASTToSyntaxMapping Mapping; 608 }; 609 610 namespace { 611 class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> { 612 public: 613 explicit BuildTreeVisitor(ASTContext &Context, syntax::TreeBuilder &Builder) 614 : Builder(Builder), Context(Context) {} 615 616 bool shouldTraversePostOrder() const { return true; } 617 618 bool WalkUpFromDeclaratorDecl(DeclaratorDecl *DD) { 619 return processDeclaratorAndDeclaration(DD); 620 } 621 622 bool WalkUpFromTypedefNameDecl(TypedefNameDecl *TD) { 623 return processDeclaratorAndDeclaration(TD); 624 } 625 626 bool VisitDecl(Decl *D) { 627 assert(!D->isImplicit()); 628 Builder.foldNode(Builder.getDeclarationRange(D), 629 new (allocator()) syntax::UnknownDeclaration(), D); 630 return true; 631 } 632 633 // RAV does not call WalkUpFrom* on explicit instantiations, so we have to 634 // override Traverse. 635 // FIXME: make RAV call WalkUpFrom* instead. 636 bool 637 TraverseClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *C) { 638 if (!RecursiveASTVisitor::TraverseClassTemplateSpecializationDecl(C)) 639 return false; 640 if (C->isExplicitSpecialization()) 641 return true; // we are only interested in explicit instantiations. 642 auto *Declaration = 643 cast<syntax::SimpleDeclaration>(handleFreeStandingTagDecl(C)); 644 foldExplicitTemplateInstantiation( 645 Builder.getTemplateRange(C), Builder.findToken(C->getExternLoc()), 646 Builder.findToken(C->getTemplateKeywordLoc()), Declaration, C); 647 return true; 648 } 649 650 bool WalkUpFromTemplateDecl(TemplateDecl *S) { 651 foldTemplateDeclaration( 652 Builder.getDeclarationRange(S), 653 Builder.findToken(S->getTemplateParameters()->getTemplateLoc()), 654 Builder.getDeclarationRange(S->getTemplatedDecl()), S); 655 return true; 656 } 657 658 bool WalkUpFromTagDecl(TagDecl *C) { 659 // FIXME: build the ClassSpecifier node. 660 if (!C->isFreeStanding()) { 661 assert(C->getNumTemplateParameterLists() == 0); 662 return true; 663 } 664 handleFreeStandingTagDecl(C); 665 return true; 666 } 667 668 syntax::Declaration *handleFreeStandingTagDecl(TagDecl *C) { 669 assert(C->isFreeStanding()); 670 // Class is a declaration specifier and needs a spanning declaration node. 671 auto DeclarationRange = Builder.getDeclarationRange(C); 672 syntax::Declaration *Result = new (allocator()) syntax::SimpleDeclaration; 673 Builder.foldNode(DeclarationRange, Result, nullptr); 674 675 // Build TemplateDeclaration nodes if we had template parameters. 676 auto ConsumeTemplateParameters = [&](const TemplateParameterList &L) { 677 const auto *TemplateKW = Builder.findToken(L.getTemplateLoc()); 678 auto R = llvm::makeArrayRef(TemplateKW, DeclarationRange.end()); 679 Result = 680 foldTemplateDeclaration(R, TemplateKW, DeclarationRange, nullptr); 681 DeclarationRange = R; 682 }; 683 if (auto *S = dyn_cast<ClassTemplatePartialSpecializationDecl>(C)) 684 ConsumeTemplateParameters(*S->getTemplateParameters()); 685 for (unsigned I = C->getNumTemplateParameterLists(); 0 < I; --I) 686 ConsumeTemplateParameters(*C->getTemplateParameterList(I - 1)); 687 return Result; 688 } 689 690 bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) { 691 // We do not want to call VisitDecl(), the declaration for translation 692 // unit is built by finalize(). 693 return true; 694 } 695 696 bool WalkUpFromCompoundStmt(CompoundStmt *S) { 697 using NodeRole = syntax::NodeRole; 698 699 Builder.markChildToken(S->getLBracLoc(), NodeRole::OpenParen); 700 for (auto *Child : S->body()) 701 Builder.markStmtChild(Child, NodeRole::Statement); 702 Builder.markChildToken(S->getRBracLoc(), NodeRole::CloseParen); 703 704 Builder.foldNode(Builder.getStmtRange(S), 705 new (allocator()) syntax::CompoundStatement, S); 706 return true; 707 } 708 709 // Some statements are not yet handled by syntax trees. 710 bool WalkUpFromStmt(Stmt *S) { 711 Builder.foldNode(Builder.getStmtRange(S), 712 new (allocator()) syntax::UnknownStatement, S); 713 return true; 714 } 715 716 bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { 717 // We override to traverse range initializer as VarDecl. 718 // RAV traverses it as a statement, we produce invalid node kinds in that 719 // case. 720 // FIXME: should do this in RAV instead? 721 bool Result = [&, this]() { 722 if (S->getInit() && !TraverseStmt(S->getInit())) 723 return false; 724 if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) 725 return false; 726 if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) 727 return false; 728 if (S->getBody() && !TraverseStmt(S->getBody())) 729 return false; 730 return true; 731 }(); 732 WalkUpFromCXXForRangeStmt(S); 733 return Result; 734 } 735 736 bool TraverseStmt(Stmt *S) { 737 if (auto *DS = dyn_cast_or_null<DeclStmt>(S)) { 738 // We want to consume the semicolon, make sure SimpleDeclaration does not. 739 for (auto *D : DS->decls()) 740 Builder.noticeDeclWithoutSemicolon(D); 741 } else if (auto *E = dyn_cast_or_null<Expr>(S)) { 742 return RecursiveASTVisitor::TraverseStmt(E->IgnoreImplicit()); 743 } 744 return RecursiveASTVisitor::TraverseStmt(S); 745 } 746 747 // Some expressions are not yet handled by syntax trees. 748 bool WalkUpFromExpr(Expr *E) { 749 assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); 750 Builder.foldNode(Builder.getExprRange(E), 751 new (allocator()) syntax::UnknownExpression, E); 752 return true; 753 } 754 755 bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { 756 // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node 757 // referencing the location of the UDL suffix (`_w` in `1.2_w`). The 758 // UDL suffix location does not point to the beginning of a token, so we 759 // can't represent the UDL suffix as a separate syntax tree node. 760 761 return WalkUpFromUserDefinedLiteral(S); 762 } 763 764 syntax::UserDefinedLiteralExpression * 765 buildUserDefinedLiteral(UserDefinedLiteral *S) { 766 switch (S->getLiteralOperatorKind()) { 767 case UserDefinedLiteral::LOK_Integer: 768 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 769 case UserDefinedLiteral::LOK_Floating: 770 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 771 case UserDefinedLiteral::LOK_Character: 772 return new (allocator()) syntax::CharUserDefinedLiteralExpression; 773 case UserDefinedLiteral::LOK_String: 774 return new (allocator()) syntax::StringUserDefinedLiteralExpression; 775 case UserDefinedLiteral::LOK_Raw: 776 case UserDefinedLiteral::LOK_Template: 777 // For raw literal operator and numeric literal operator template we 778 // cannot get the type of the operand in the semantic AST. We get this 779 // information from the token. As integer and floating point have the same 780 // token kind, we run `NumericLiteralParser` again to distinguish them. 781 auto TokLoc = S->getBeginLoc(); 782 auto TokSpelling = 783 Builder.findToken(TokLoc)->text(Context.getSourceManager()); 784 auto Literal = 785 NumericLiteralParser(TokSpelling, TokLoc, Context.getSourceManager(), 786 Context.getLangOpts(), Context.getTargetInfo(), 787 Context.getDiagnostics()); 788 if (Literal.isIntegerLiteral()) 789 return new (allocator()) syntax::IntegerUserDefinedLiteralExpression; 790 else { 791 assert(Literal.isFloatingLiteral()); 792 return new (allocator()) syntax::FloatUserDefinedLiteralExpression; 793 } 794 } 795 llvm_unreachable("Unknown literal operator kind."); 796 } 797 798 bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { 799 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 800 Builder.foldNode(Builder.getExprRange(S), buildUserDefinedLiteral(S), S); 801 return true; 802 } 803 804 // FIXME: Fix `NestedNameSpecifierLoc::getLocalSourceRange` for the 805 // `DependentTemplateSpecializationType` case. 806 /// Given a nested-name-specifier return the range for the last name 807 /// specifier. 808 /// 809 /// e.g. `std::T::template X<U>::` => `template X<U>::` 810 SourceRange getLocalSourceRange(const NestedNameSpecifierLoc &NNSLoc) { 811 auto SR = NNSLoc.getLocalSourceRange(); 812 813 // The method `NestedNameSpecifierLoc::getLocalSourceRange` *should* 814 // return the desired `SourceRange`, but there is a corner case. For a 815 // `DependentTemplateSpecializationType` this method returns its 816 // qualifiers as well, in other words in the example above this method 817 // returns `T::template X<U>::` instead of only `template X<U>::` 818 if (auto TL = NNSLoc.getTypeLoc()) { 819 if (auto DependentTL = 820 TL.getAs<DependentTemplateSpecializationTypeLoc>()) { 821 // The 'template' keyword is always present in dependent template 822 // specializations. Except in the case of incorrect code 823 // TODO: Treat the case of incorrect code. 824 SR.setBegin(DependentTL.getTemplateKeywordLoc()); 825 } 826 } 827 828 return SR; 829 } 830 831 syntax::NodeKind getNameSpecifierKind(const NestedNameSpecifier &NNS) { 832 switch (NNS.getKind()) { 833 case NestedNameSpecifier::Global: 834 return syntax::NodeKind::GlobalNameSpecifier; 835 case NestedNameSpecifier::Namespace: 836 case NestedNameSpecifier::NamespaceAlias: 837 case NestedNameSpecifier::Identifier: 838 return syntax::NodeKind::IdentifierNameSpecifier; 839 case NestedNameSpecifier::TypeSpecWithTemplate: 840 return syntax::NodeKind::SimpleTemplateNameSpecifier; 841 case NestedNameSpecifier::TypeSpec: { 842 const auto *NNSType = NNS.getAsType(); 843 assert(NNSType); 844 if (isa<DecltypeType>(NNSType)) 845 return syntax::NodeKind::DecltypeNameSpecifier; 846 if (isa<TemplateSpecializationType, DependentTemplateSpecializationType>( 847 NNSType)) 848 return syntax::NodeKind::SimpleTemplateNameSpecifier; 849 return syntax::NodeKind::IdentifierNameSpecifier; 850 } 851 default: 852 // FIXME: Support Microsoft's __super 853 llvm::report_fatal_error("We don't yet support the __super specifier", 854 true); 855 } 856 } 857 858 syntax::NameSpecifier * 859 buildNameSpecifier(const NestedNameSpecifierLoc &NNSLoc) { 860 assert(NNSLoc.hasQualifier()); 861 auto NameSpecifierTokens = 862 Builder.getRange(getLocalSourceRange(NNSLoc)).drop_back(); 863 switch (getNameSpecifierKind(*NNSLoc.getNestedNameSpecifier())) { 864 case syntax::NodeKind::GlobalNameSpecifier: 865 return new (allocator()) syntax::GlobalNameSpecifier; 866 case syntax::NodeKind::IdentifierNameSpecifier: { 867 assert(NameSpecifierTokens.size() == 1); 868 Builder.markChildToken(NameSpecifierTokens.begin(), 869 syntax::NodeRole::Unknown); 870 auto *NS = new (allocator()) syntax::IdentifierNameSpecifier; 871 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 872 return NS; 873 } 874 case syntax::NodeKind::SimpleTemplateNameSpecifier: { 875 // TODO: Build `SimpleTemplateNameSpecifier` children and implement 876 // accessors to them. 877 // Be aware, we cannot do that simply by calling `TraverseTypeLoc`, 878 // some `TypeLoc`s have inside them the previous name specifier and 879 // we want to treat them independently. 880 auto *NS = new (allocator()) syntax::SimpleTemplateNameSpecifier; 881 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 882 return NS; 883 } 884 case syntax::NodeKind::DecltypeNameSpecifier: { 885 const auto TL = NNSLoc.getTypeLoc().castAs<DecltypeTypeLoc>(); 886 if (!RecursiveASTVisitor::TraverseDecltypeTypeLoc(TL)) 887 return nullptr; 888 auto *NS = new (allocator()) syntax::DecltypeNameSpecifier; 889 // TODO: Implement accessor to `DecltypeNameSpecifier` inner 890 // `DecltypeTypeLoc`. 891 // For that add mapping from `TypeLoc` to `syntax::Node*` then: 892 // Builder.markChild(TypeLoc, syntax::NodeRole); 893 Builder.foldNode(NameSpecifierTokens, NS, nullptr); 894 return NS; 895 } 896 default: 897 llvm_unreachable("getChildKind() does not return this value"); 898 } 899 } 900 901 // To build syntax tree nodes for NestedNameSpecifierLoc we override 902 // Traverse instead of WalkUpFrom because we want to traverse the children 903 // ourselves and build a list instead of a nested tree of name specifier 904 // prefixes. 905 bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc QualifierLoc) { 906 if (!QualifierLoc) 907 return true; 908 for (auto it = QualifierLoc; it; it = it.getPrefix()) { 909 auto *NS = buildNameSpecifier(it); 910 if (!NS) 911 return false; 912 Builder.markChild(NS, syntax::NodeRole::ListElement); 913 Builder.markChildToken(it.getEndLoc(), syntax::NodeRole::ListDelimiter); 914 } 915 Builder.foldNode(Builder.getRange(QualifierLoc.getSourceRange()), 916 new (allocator()) syntax::NestedNameSpecifier, 917 QualifierLoc); 918 return true; 919 } 920 921 syntax::IdExpression *buildIdExpression(NestedNameSpecifierLoc QualifierLoc, 922 SourceLocation TemplateKeywordLoc, 923 SourceRange UnqualifiedIdLoc, 924 ASTPtr From) { 925 if (QualifierLoc) { 926 Builder.markChild(QualifierLoc, syntax::NodeRole::Qualifier); 927 if (TemplateKeywordLoc.isValid()) 928 Builder.markChildToken(TemplateKeywordLoc, 929 syntax::NodeRole::TemplateKeyword); 930 } 931 932 auto *TheUnqualifiedId = new (allocator()) syntax::UnqualifiedId; 933 Builder.foldNode(Builder.getRange(UnqualifiedIdLoc), TheUnqualifiedId, 934 nullptr); 935 Builder.markChild(TheUnqualifiedId, syntax::NodeRole::UnqualifiedId); 936 937 auto IdExpressionBeginLoc = 938 QualifierLoc ? QualifierLoc.getBeginLoc() : UnqualifiedIdLoc.getBegin(); 939 940 auto *TheIdExpression = new (allocator()) syntax::IdExpression; 941 Builder.foldNode( 942 Builder.getRange(IdExpressionBeginLoc, UnqualifiedIdLoc.getEnd()), 943 TheIdExpression, From); 944 945 return TheIdExpression; 946 } 947 948 bool WalkUpFromMemberExpr(MemberExpr *S) { 949 // For `MemberExpr` with implicit `this->` we generate a simple 950 // `id-expression` syntax node, beacuse an implicit `member-expression` is 951 // syntactically undistinguishable from an `id-expression` 952 if (S->isImplicitAccess()) { 953 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 954 SourceRange(S->getMemberLoc(), S->getEndLoc()), S); 955 return true; 956 } 957 958 auto *TheIdExpression = buildIdExpression( 959 S->getQualifierLoc(), S->getTemplateKeywordLoc(), 960 SourceRange(S->getMemberLoc(), S->getEndLoc()), nullptr); 961 962 Builder.markChild(TheIdExpression, syntax::NodeRole::Member); 963 964 Builder.markExprChild(S->getBase(), syntax::NodeRole::Object); 965 Builder.markChildToken(S->getOperatorLoc(), syntax::NodeRole::AccessToken); 966 967 Builder.foldNode(Builder.getExprRange(S), 968 new (allocator()) syntax::MemberExpression, S); 969 return true; 970 } 971 972 bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { 973 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 974 SourceRange(S->getLocation(), S->getEndLoc()), S); 975 976 return true; 977 } 978 979 // Same logic as DeclRefExpr. 980 bool WalkUpFromDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *S) { 981 buildIdExpression(S->getQualifierLoc(), S->getTemplateKeywordLoc(), 982 SourceRange(S->getLocation(), S->getEndLoc()), S); 983 984 return true; 985 } 986 987 bool WalkUpFromCXXThisExpr(CXXThisExpr *S) { 988 if (!S->isImplicit()) { 989 Builder.markChildToken(S->getLocation(), 990 syntax::NodeRole::IntroducerKeyword); 991 Builder.foldNode(Builder.getExprRange(S), 992 new (allocator()) syntax::ThisExpression, S); 993 } 994 return true; 995 } 996 997 bool WalkUpFromParenExpr(ParenExpr *S) { 998 Builder.markChildToken(S->getLParen(), syntax::NodeRole::OpenParen); 999 Builder.markExprChild(S->getSubExpr(), syntax::NodeRole::SubExpression); 1000 Builder.markChildToken(S->getRParen(), syntax::NodeRole::CloseParen); 1001 Builder.foldNode(Builder.getExprRange(S), 1002 new (allocator()) syntax::ParenExpression, S); 1003 return true; 1004 } 1005 1006 bool WalkUpFromIntegerLiteral(IntegerLiteral *S) { 1007 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1008 Builder.foldNode(Builder.getExprRange(S), 1009 new (allocator()) syntax::IntegerLiteralExpression, S); 1010 return true; 1011 } 1012 1013 bool WalkUpFromCharacterLiteral(CharacterLiteral *S) { 1014 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1015 Builder.foldNode(Builder.getExprRange(S), 1016 new (allocator()) syntax::CharacterLiteralExpression, S); 1017 return true; 1018 } 1019 1020 bool WalkUpFromFloatingLiteral(FloatingLiteral *S) { 1021 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1022 Builder.foldNode(Builder.getExprRange(S), 1023 new (allocator()) syntax::FloatingLiteralExpression, S); 1024 return true; 1025 } 1026 1027 bool WalkUpFromStringLiteral(StringLiteral *S) { 1028 Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); 1029 Builder.foldNode(Builder.getExprRange(S), 1030 new (allocator()) syntax::StringLiteralExpression, S); 1031 return true; 1032 } 1033 1034 bool WalkUpFromCXXBoolLiteralExpr(CXXBoolLiteralExpr *S) { 1035 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1036 Builder.foldNode(Builder.getExprRange(S), 1037 new (allocator()) syntax::BoolLiteralExpression, S); 1038 return true; 1039 } 1040 1041 bool WalkUpFromCXXNullPtrLiteralExpr(CXXNullPtrLiteralExpr *S) { 1042 Builder.markChildToken(S->getLocation(), syntax::NodeRole::LiteralToken); 1043 Builder.foldNode(Builder.getExprRange(S), 1044 new (allocator()) syntax::CxxNullPtrExpression, S); 1045 return true; 1046 } 1047 1048 bool WalkUpFromUnaryOperator(UnaryOperator *S) { 1049 Builder.markChildToken(S->getOperatorLoc(), 1050 syntax::NodeRole::OperatorToken); 1051 Builder.markExprChild(S->getSubExpr(), syntax::NodeRole::Operand); 1052 1053 if (S->isPostfix()) 1054 Builder.foldNode(Builder.getExprRange(S), 1055 new (allocator()) syntax::PostfixUnaryOperatorExpression, 1056 S); 1057 else 1058 Builder.foldNode(Builder.getExprRange(S), 1059 new (allocator()) syntax::PrefixUnaryOperatorExpression, 1060 S); 1061 1062 return true; 1063 } 1064 1065 bool WalkUpFromBinaryOperator(BinaryOperator *S) { 1066 Builder.markExprChild(S->getLHS(), syntax::NodeRole::LeftHandSide); 1067 Builder.markChildToken(S->getOperatorLoc(), 1068 syntax::NodeRole::OperatorToken); 1069 Builder.markExprChild(S->getRHS(), syntax::NodeRole::RightHandSide); 1070 Builder.foldNode(Builder.getExprRange(S), 1071 new (allocator()) syntax::BinaryOperatorExpression, S); 1072 return true; 1073 } 1074 1075 syntax::CallArguments *buildCallArguments(CallExpr::arg_range Args) { 1076 for (const auto &Arg : Args) { 1077 Builder.markExprChild(Arg, syntax::NodeRole::ListElement); 1078 const auto *DelimiterToken = 1079 std::next(Builder.findToken(Arg->getEndLoc())); 1080 if (DelimiterToken->kind() == clang::tok::TokenKind::comma) 1081 Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter); 1082 } 1083 1084 auto *Arguments = new (allocator()) syntax::CallArguments; 1085 if (!Args.empty()) 1086 Builder.foldNode(Builder.getRange((*Args.begin())->getBeginLoc(), 1087 (*(Args.end() - 1))->getEndLoc()), 1088 Arguments, nullptr); 1089 1090 return Arguments; 1091 } 1092 1093 bool WalkUpFromCallExpr(CallExpr *S) { 1094 Builder.markExprChild(S->getCallee(), syntax::NodeRole::Callee); 1095 1096 const auto *LParenToken = 1097 std::next(Builder.findToken(S->getCallee()->getEndLoc())); 1098 // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have fixed 1099 // the test on decltype desctructors. 1100 if (LParenToken->kind() == clang::tok::l_paren) 1101 Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen); 1102 1103 Builder.markChild(buildCallArguments(S->arguments()), 1104 syntax::NodeRole::Arguments); 1105 1106 Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen); 1107 1108 Builder.foldNode(Builder.getRange(S->getSourceRange()), 1109 new (allocator()) syntax::CallExpression, S); 1110 return true; 1111 } 1112 1113 bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 1114 // To construct a syntax tree of the same shape for calls to built-in and 1115 // user-defined operators, ignore the `DeclRefExpr` that refers to the 1116 // operator and treat it as a simple token. Do that by traversing 1117 // arguments instead of children. 1118 for (auto *child : S->arguments()) { 1119 // A postfix unary operator is declared as taking two operands. The 1120 // second operand is used to distinguish from its prefix counterpart. In 1121 // the semantic AST this "phantom" operand is represented as a 1122 // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this 1123 // operand because it does not correspond to anything written in source 1124 // code. 1125 if (child->getSourceRange().isInvalid()) { 1126 assert(getOperatorNodeKind(*S) == 1127 syntax::NodeKind::PostfixUnaryOperatorExpression); 1128 continue; 1129 } 1130 if (!TraverseStmt(child)) 1131 return false; 1132 } 1133 return WalkUpFromCXXOperatorCallExpr(S); 1134 } 1135 1136 bool WalkUpFromCXXOperatorCallExpr(CXXOperatorCallExpr *S) { 1137 switch (getOperatorNodeKind(*S)) { 1138 case syntax::NodeKind::BinaryOperatorExpression: 1139 Builder.markExprChild(S->getArg(0), syntax::NodeRole::LeftHandSide); 1140 Builder.markChildToken(S->getOperatorLoc(), 1141 syntax::NodeRole::OperatorToken); 1142 Builder.markExprChild(S->getArg(1), syntax::NodeRole::RightHandSide); 1143 Builder.foldNode(Builder.getExprRange(S), 1144 new (allocator()) syntax::BinaryOperatorExpression, S); 1145 return true; 1146 case syntax::NodeKind::PrefixUnaryOperatorExpression: 1147 Builder.markChildToken(S->getOperatorLoc(), 1148 syntax::NodeRole::OperatorToken); 1149 Builder.markExprChild(S->getArg(0), syntax::NodeRole::Operand); 1150 Builder.foldNode(Builder.getExprRange(S), 1151 new (allocator()) syntax::PrefixUnaryOperatorExpression, 1152 S); 1153 return true; 1154 case syntax::NodeKind::PostfixUnaryOperatorExpression: 1155 Builder.markChildToken(S->getOperatorLoc(), 1156 syntax::NodeRole::OperatorToken); 1157 Builder.markExprChild(S->getArg(0), syntax::NodeRole::Operand); 1158 Builder.foldNode(Builder.getExprRange(S), 1159 new (allocator()) syntax::PostfixUnaryOperatorExpression, 1160 S); 1161 return true; 1162 case syntax::NodeKind::CallExpression: { 1163 Builder.markExprChild(S->getArg(0), syntax::NodeRole::Callee); 1164 1165 const auto *LParenToken = 1166 std::next(Builder.findToken(S->getArg(0)->getEndLoc())); 1167 // FIXME: Assert that `LParenToken` is indeed a `l_paren` once we have 1168 // fixed the test on decltype desctructors. 1169 if (LParenToken->kind() == clang::tok::l_paren) 1170 Builder.markChildToken(LParenToken, syntax::NodeRole::OpenParen); 1171 1172 Builder.markChild(buildCallArguments(CallExpr::arg_range( 1173 S->arg_begin() + 1, S->arg_end())), 1174 syntax::NodeRole::Arguments); 1175 1176 Builder.markChildToken(S->getRParenLoc(), syntax::NodeRole::CloseParen); 1177 1178 Builder.foldNode(Builder.getRange(S->getSourceRange()), 1179 new (allocator()) syntax::CallExpression, S); 1180 return true; 1181 } 1182 case syntax::NodeKind::UnknownExpression: 1183 return WalkUpFromExpr(S); 1184 default: 1185 llvm_unreachable("getOperatorNodeKind() does not return this value"); 1186 } 1187 } 1188 1189 bool WalkUpFromNamespaceDecl(NamespaceDecl *S) { 1190 auto Tokens = Builder.getDeclarationRange(S); 1191 if (Tokens.front().kind() == tok::coloncolon) { 1192 // Handle nested namespace definitions. Those start at '::' token, e.g. 1193 // namespace a^::b {} 1194 // FIXME: build corresponding nodes for the name of this namespace. 1195 return true; 1196 } 1197 Builder.foldNode(Tokens, new (allocator()) syntax::NamespaceDefinition, S); 1198 return true; 1199 } 1200 1201 // FIXME: Deleting the `TraverseParenTypeLoc` override doesn't change test 1202 // results. Find test coverage or remove it. 1203 bool TraverseParenTypeLoc(ParenTypeLoc L) { 1204 // We reverse order of traversal to get the proper syntax structure. 1205 if (!WalkUpFromParenTypeLoc(L)) 1206 return false; 1207 return TraverseTypeLoc(L.getInnerLoc()); 1208 } 1209 1210 bool WalkUpFromParenTypeLoc(ParenTypeLoc L) { 1211 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 1212 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 1213 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getRParenLoc()), 1214 new (allocator()) syntax::ParenDeclarator, L); 1215 return true; 1216 } 1217 1218 // Declarator chunks, they are produced by type locs and some clang::Decls. 1219 bool WalkUpFromArrayTypeLoc(ArrayTypeLoc L) { 1220 Builder.markChildToken(L.getLBracketLoc(), syntax::NodeRole::OpenParen); 1221 Builder.markExprChild(L.getSizeExpr(), syntax::NodeRole::Size); 1222 Builder.markChildToken(L.getRBracketLoc(), syntax::NodeRole::CloseParen); 1223 Builder.foldNode(Builder.getRange(L.getLBracketLoc(), L.getRBracketLoc()), 1224 new (allocator()) syntax::ArraySubscript, L); 1225 return true; 1226 } 1227 1228 syntax::ParameterDeclarationList * 1229 buildParameterDeclarationList(ArrayRef<ParmVarDecl *> Params) { 1230 for (auto *P : Params) { 1231 Builder.markChild(P, syntax::NodeRole::ListElement); 1232 const auto *DelimiterToken = std::next(Builder.findToken(P->getEndLoc())); 1233 if (DelimiterToken->kind() == clang::tok::TokenKind::comma) 1234 Builder.markChildToken(DelimiterToken, syntax::NodeRole::ListDelimiter); 1235 } 1236 auto *Parameters = new (allocator()) syntax::ParameterDeclarationList; 1237 if (!Params.empty()) 1238 Builder.foldNode(Builder.getRange(Params.front()->getBeginLoc(), 1239 Params.back()->getEndLoc()), 1240 Parameters, nullptr); 1241 return Parameters; 1242 } 1243 1244 bool WalkUpFromFunctionTypeLoc(FunctionTypeLoc L) { 1245 Builder.markChildToken(L.getLParenLoc(), syntax::NodeRole::OpenParen); 1246 1247 Builder.markChild(buildParameterDeclarationList(L.getParams()), 1248 syntax::NodeRole::Parameters); 1249 1250 Builder.markChildToken(L.getRParenLoc(), syntax::NodeRole::CloseParen); 1251 Builder.foldNode(Builder.getRange(L.getLParenLoc(), L.getEndLoc()), 1252 new (allocator()) syntax::ParametersAndQualifiers, L); 1253 return true; 1254 } 1255 1256 bool WalkUpFromFunctionProtoTypeLoc(FunctionProtoTypeLoc L) { 1257 if (!L.getTypePtr()->hasTrailingReturn()) 1258 return WalkUpFromFunctionTypeLoc(L); 1259 1260 auto *TrailingReturnTokens = buildTrailingReturn(L); 1261 // Finish building the node for parameters. 1262 Builder.markChild(TrailingReturnTokens, syntax::NodeRole::TrailingReturn); 1263 return WalkUpFromFunctionTypeLoc(L); 1264 } 1265 1266 bool TraverseMemberPointerTypeLoc(MemberPointerTypeLoc L) { 1267 // In the source code "void (Y::*mp)()" `MemberPointerTypeLoc` corresponds 1268 // to "Y::*" but it points to a `ParenTypeLoc` that corresponds to 1269 // "(Y::*mp)" We thus reverse the order of traversal to get the proper 1270 // syntax structure. 1271 if (!WalkUpFromMemberPointerTypeLoc(L)) 1272 return false; 1273 return TraverseTypeLoc(L.getPointeeLoc()); 1274 } 1275 1276 bool WalkUpFromMemberPointerTypeLoc(MemberPointerTypeLoc L) { 1277 auto SR = L.getLocalSourceRange(); 1278 Builder.foldNode(Builder.getRange(SR), 1279 new (allocator()) syntax::MemberPointer, L); 1280 return true; 1281 } 1282 1283 // The code below is very regular, it could even be generated with some 1284 // preprocessor magic. We merely assign roles to the corresponding children 1285 // and fold resulting nodes. 1286 bool WalkUpFromDeclStmt(DeclStmt *S) { 1287 Builder.foldNode(Builder.getStmtRange(S), 1288 new (allocator()) syntax::DeclarationStatement, S); 1289 return true; 1290 } 1291 1292 bool WalkUpFromNullStmt(NullStmt *S) { 1293 Builder.foldNode(Builder.getStmtRange(S), 1294 new (allocator()) syntax::EmptyStatement, S); 1295 return true; 1296 } 1297 1298 bool WalkUpFromSwitchStmt(SwitchStmt *S) { 1299 Builder.markChildToken(S->getSwitchLoc(), 1300 syntax::NodeRole::IntroducerKeyword); 1301 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1302 Builder.foldNode(Builder.getStmtRange(S), 1303 new (allocator()) syntax::SwitchStatement, S); 1304 return true; 1305 } 1306 1307 bool WalkUpFromCaseStmt(CaseStmt *S) { 1308 Builder.markChildToken(S->getKeywordLoc(), 1309 syntax::NodeRole::IntroducerKeyword); 1310 Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseValue); 1311 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1312 Builder.foldNode(Builder.getStmtRange(S), 1313 new (allocator()) syntax::CaseStatement, S); 1314 return true; 1315 } 1316 1317 bool WalkUpFromDefaultStmt(DefaultStmt *S) { 1318 Builder.markChildToken(S->getKeywordLoc(), 1319 syntax::NodeRole::IntroducerKeyword); 1320 Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); 1321 Builder.foldNode(Builder.getStmtRange(S), 1322 new (allocator()) syntax::DefaultStatement, S); 1323 return true; 1324 } 1325 1326 bool WalkUpFromIfStmt(IfStmt *S) { 1327 Builder.markChildToken(S->getIfLoc(), syntax::NodeRole::IntroducerKeyword); 1328 Builder.markStmtChild(S->getThen(), syntax::NodeRole::ThenStatement); 1329 Builder.markChildToken(S->getElseLoc(), syntax::NodeRole::ElseKeyword); 1330 Builder.markStmtChild(S->getElse(), syntax::NodeRole::ElseStatement); 1331 Builder.foldNode(Builder.getStmtRange(S), 1332 new (allocator()) syntax::IfStatement, S); 1333 return true; 1334 } 1335 1336 bool WalkUpFromForStmt(ForStmt *S) { 1337 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1338 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1339 Builder.foldNode(Builder.getStmtRange(S), 1340 new (allocator()) syntax::ForStatement, S); 1341 return true; 1342 } 1343 1344 bool WalkUpFromWhileStmt(WhileStmt *S) { 1345 Builder.markChildToken(S->getWhileLoc(), 1346 syntax::NodeRole::IntroducerKeyword); 1347 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1348 Builder.foldNode(Builder.getStmtRange(S), 1349 new (allocator()) syntax::WhileStatement, S); 1350 return true; 1351 } 1352 1353 bool WalkUpFromContinueStmt(ContinueStmt *S) { 1354 Builder.markChildToken(S->getContinueLoc(), 1355 syntax::NodeRole::IntroducerKeyword); 1356 Builder.foldNode(Builder.getStmtRange(S), 1357 new (allocator()) syntax::ContinueStatement, S); 1358 return true; 1359 } 1360 1361 bool WalkUpFromBreakStmt(BreakStmt *S) { 1362 Builder.markChildToken(S->getBreakLoc(), 1363 syntax::NodeRole::IntroducerKeyword); 1364 Builder.foldNode(Builder.getStmtRange(S), 1365 new (allocator()) syntax::BreakStatement, S); 1366 return true; 1367 } 1368 1369 bool WalkUpFromReturnStmt(ReturnStmt *S) { 1370 Builder.markChildToken(S->getReturnLoc(), 1371 syntax::NodeRole::IntroducerKeyword); 1372 Builder.markExprChild(S->getRetValue(), syntax::NodeRole::ReturnValue); 1373 Builder.foldNode(Builder.getStmtRange(S), 1374 new (allocator()) syntax::ReturnStatement, S); 1375 return true; 1376 } 1377 1378 bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { 1379 Builder.markChildToken(S->getForLoc(), syntax::NodeRole::IntroducerKeyword); 1380 Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); 1381 Builder.foldNode(Builder.getStmtRange(S), 1382 new (allocator()) syntax::RangeBasedForStatement, S); 1383 return true; 1384 } 1385 1386 bool WalkUpFromEmptyDecl(EmptyDecl *S) { 1387 Builder.foldNode(Builder.getDeclarationRange(S), 1388 new (allocator()) syntax::EmptyDeclaration, S); 1389 return true; 1390 } 1391 1392 bool WalkUpFromStaticAssertDecl(StaticAssertDecl *S) { 1393 Builder.markExprChild(S->getAssertExpr(), syntax::NodeRole::Condition); 1394 Builder.markExprChild(S->getMessage(), syntax::NodeRole::Message); 1395 Builder.foldNode(Builder.getDeclarationRange(S), 1396 new (allocator()) syntax::StaticAssertDeclaration, S); 1397 return true; 1398 } 1399 1400 bool WalkUpFromLinkageSpecDecl(LinkageSpecDecl *S) { 1401 Builder.foldNode(Builder.getDeclarationRange(S), 1402 new (allocator()) syntax::LinkageSpecificationDeclaration, 1403 S); 1404 return true; 1405 } 1406 1407 bool WalkUpFromNamespaceAliasDecl(NamespaceAliasDecl *S) { 1408 Builder.foldNode(Builder.getDeclarationRange(S), 1409 new (allocator()) syntax::NamespaceAliasDefinition, S); 1410 return true; 1411 } 1412 1413 bool WalkUpFromUsingDirectiveDecl(UsingDirectiveDecl *S) { 1414 Builder.foldNode(Builder.getDeclarationRange(S), 1415 new (allocator()) syntax::UsingNamespaceDirective, S); 1416 return true; 1417 } 1418 1419 bool WalkUpFromUsingDecl(UsingDecl *S) { 1420 Builder.foldNode(Builder.getDeclarationRange(S), 1421 new (allocator()) syntax::UsingDeclaration, S); 1422 return true; 1423 } 1424 1425 bool WalkUpFromUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *S) { 1426 Builder.foldNode(Builder.getDeclarationRange(S), 1427 new (allocator()) syntax::UsingDeclaration, S); 1428 return true; 1429 } 1430 1431 bool WalkUpFromUnresolvedUsingTypenameDecl(UnresolvedUsingTypenameDecl *S) { 1432 Builder.foldNode(Builder.getDeclarationRange(S), 1433 new (allocator()) syntax::UsingDeclaration, S); 1434 return true; 1435 } 1436 1437 bool WalkUpFromTypeAliasDecl(TypeAliasDecl *S) { 1438 Builder.foldNode(Builder.getDeclarationRange(S), 1439 new (allocator()) syntax::TypeAliasDeclaration, S); 1440 return true; 1441 } 1442 1443 private: 1444 /// Folds SimpleDeclarator node (if present) and in case this is the last 1445 /// declarator in the chain it also folds SimpleDeclaration node. 1446 template <class T> bool processDeclaratorAndDeclaration(T *D) { 1447 auto Range = getDeclaratorRange( 1448 Builder.sourceManager(), D->getTypeSourceInfo()->getTypeLoc(), 1449 getQualifiedNameStart(D), getInitializerRange(D)); 1450 1451 // There doesn't have to be a declarator (e.g. `void foo(int)` only has 1452 // declaration, but no declarator). 1453 if (Range.getBegin().isValid()) { 1454 auto *N = new (allocator()) syntax::SimpleDeclarator; 1455 Builder.foldNode(Builder.getRange(Range), N, nullptr); 1456 Builder.markChild(N, syntax::NodeRole::Declarator); 1457 } 1458 1459 if (Builder.isResponsibleForCreatingDeclaration(D)) { 1460 Builder.foldNode(Builder.getDeclarationRange(D), 1461 new (allocator()) syntax::SimpleDeclaration, D); 1462 } 1463 return true; 1464 } 1465 1466 /// Returns the range of the built node. 1467 syntax::TrailingReturnType *buildTrailingReturn(FunctionProtoTypeLoc L) { 1468 assert(L.getTypePtr()->hasTrailingReturn()); 1469 1470 auto ReturnedType = L.getReturnLoc(); 1471 // Build node for the declarator, if any. 1472 auto ReturnDeclaratorRange = SourceRange(GetStartLoc().Visit(ReturnedType), 1473 ReturnedType.getEndLoc()); 1474 syntax::SimpleDeclarator *ReturnDeclarator = nullptr; 1475 if (ReturnDeclaratorRange.isValid()) { 1476 ReturnDeclarator = new (allocator()) syntax::SimpleDeclarator; 1477 Builder.foldNode(Builder.getRange(ReturnDeclaratorRange), 1478 ReturnDeclarator, nullptr); 1479 } 1480 1481 // Build node for trailing return type. 1482 auto Return = Builder.getRange(ReturnedType.getSourceRange()); 1483 const auto *Arrow = Return.begin() - 1; 1484 assert(Arrow->kind() == tok::arrow); 1485 auto Tokens = llvm::makeArrayRef(Arrow, Return.end()); 1486 Builder.markChildToken(Arrow, syntax::NodeRole::ArrowToken); 1487 if (ReturnDeclarator) 1488 Builder.markChild(ReturnDeclarator, syntax::NodeRole::Declarator); 1489 auto *R = new (allocator()) syntax::TrailingReturnType; 1490 Builder.foldNode(Tokens, R, L); 1491 return R; 1492 } 1493 1494 void foldExplicitTemplateInstantiation( 1495 ArrayRef<syntax::Token> Range, const syntax::Token *ExternKW, 1496 const syntax::Token *TemplateKW, 1497 syntax::SimpleDeclaration *InnerDeclaration, Decl *From) { 1498 assert(!ExternKW || ExternKW->kind() == tok::kw_extern); 1499 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1500 Builder.markChildToken(ExternKW, syntax::NodeRole::ExternKeyword); 1501 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1502 Builder.markChild(InnerDeclaration, syntax::NodeRole::Declaration); 1503 Builder.foldNode( 1504 Range, new (allocator()) syntax::ExplicitTemplateInstantiation, From); 1505 } 1506 1507 syntax::TemplateDeclaration *foldTemplateDeclaration( 1508 ArrayRef<syntax::Token> Range, const syntax::Token *TemplateKW, 1509 ArrayRef<syntax::Token> TemplatedDeclaration, Decl *From) { 1510 assert(TemplateKW && TemplateKW->kind() == tok::kw_template); 1511 Builder.markChildToken(TemplateKW, syntax::NodeRole::IntroducerKeyword); 1512 1513 auto *N = new (allocator()) syntax::TemplateDeclaration; 1514 Builder.foldNode(Range, N, From); 1515 Builder.markChild(N, syntax::NodeRole::Declaration); 1516 return N; 1517 } 1518 1519 /// A small helper to save some typing. 1520 llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } 1521 1522 syntax::TreeBuilder &Builder; 1523 const ASTContext &Context; 1524 }; 1525 } // namespace 1526 1527 void syntax::TreeBuilder::noticeDeclWithoutSemicolon(Decl *D) { 1528 DeclsWithoutSemicolons.insert(D); 1529 } 1530 1531 void syntax::TreeBuilder::markChildToken(SourceLocation Loc, NodeRole Role) { 1532 if (Loc.isInvalid()) 1533 return; 1534 Pending.assignRole(*findToken(Loc), Role); 1535 } 1536 1537 void syntax::TreeBuilder::markChildToken(const syntax::Token *T, NodeRole R) { 1538 if (!T) 1539 return; 1540 Pending.assignRole(*T, R); 1541 } 1542 1543 void syntax::TreeBuilder::markChild(syntax::Node *N, NodeRole R) { 1544 assert(N); 1545 setRole(N, R); 1546 } 1547 1548 void syntax::TreeBuilder::markChild(ASTPtr N, NodeRole R) { 1549 auto *SN = Mapping.find(N); 1550 assert(SN != nullptr); 1551 setRole(SN, R); 1552 } 1553 void syntax::TreeBuilder::markChild(NestedNameSpecifierLoc NNSLoc, NodeRole R) { 1554 auto *SN = Mapping.find(NNSLoc); 1555 assert(SN != nullptr); 1556 setRole(SN, R); 1557 } 1558 1559 void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { 1560 if (!Child) 1561 return; 1562 1563 syntax::Tree *ChildNode; 1564 if (Expr *ChildExpr = dyn_cast<Expr>(Child)) { 1565 // This is an expression in a statement position, consume the trailing 1566 // semicolon and form an 'ExpressionStatement' node. 1567 markExprChild(ChildExpr, NodeRole::Expression); 1568 ChildNode = new (allocator()) syntax::ExpressionStatement; 1569 // (!) 'getStmtRange()' ensures this covers a trailing semicolon. 1570 Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); 1571 } else { 1572 ChildNode = Mapping.find(Child); 1573 } 1574 assert(ChildNode != nullptr); 1575 setRole(ChildNode, Role); 1576 } 1577 1578 void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { 1579 if (!Child) 1580 return; 1581 Child = Child->IgnoreImplicit(); 1582 1583 syntax::Tree *ChildNode = Mapping.find(Child); 1584 assert(ChildNode != nullptr); 1585 setRole(ChildNode, Role); 1586 } 1587 1588 const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { 1589 if (L.isInvalid()) 1590 return nullptr; 1591 auto It = LocationToToken.find(L.getRawEncoding()); 1592 assert(It != LocationToToken.end()); 1593 return It->second; 1594 } 1595 1596 syntax::TranslationUnit * 1597 syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) { 1598 TreeBuilder Builder(A); 1599 BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); 1600 return std::move(Builder).finalize(); 1601 } 1602