1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #include <algorithm> 22 23 #define DEBUG_TYPE "format-parser" 24 25 namespace clang { 26 namespace format { 27 28 class FormatTokenSource { 29 public: 30 virtual ~FormatTokenSource() {} 31 32 // Returns the next token in the token stream. 33 virtual FormatToken *getNextToken() = 0; 34 35 // Returns the token precedint the token returned by the last call to 36 // getNextToken() in the token stream, or nullptr if no such token exists. 37 virtual FormatToken *getPreviousToken() = 0; 38 39 // Returns the token that would be returned by the next call to 40 // getNextToken(). 41 virtual FormatToken *peekNextToken() = 0; 42 43 // Returns whether we are at the end of the file. 44 // This can be different from whether getNextToken() returned an eof token 45 // when the FormatTokenSource is a view on a part of the token stream. 46 virtual bool isEOF() = 0; 47 48 // Gets the current position in the token stream, to be used by setPosition(). 49 virtual unsigned getPosition() = 0; 50 51 // Resets the token stream to the state it was in when getPosition() returned 52 // Position, and return the token at that position in the stream. 53 virtual FormatToken *setPosition(unsigned Position) = 0; 54 }; 55 56 namespace { 57 58 class ScopedDeclarationState { 59 public: 60 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 61 bool MustBeDeclaration) 62 : Line(Line), Stack(Stack) { 63 Line.MustBeDeclaration = MustBeDeclaration; 64 Stack.push_back(MustBeDeclaration); 65 } 66 ~ScopedDeclarationState() { 67 Stack.pop_back(); 68 if (!Stack.empty()) 69 Line.MustBeDeclaration = Stack.back(); 70 else 71 Line.MustBeDeclaration = true; 72 } 73 74 private: 75 UnwrappedLine &Line; 76 std::vector<bool> &Stack; 77 }; 78 79 static bool isLineComment(const FormatToken &FormatTok) { 80 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); 81 } 82 83 // Checks if \p FormatTok is a line comment that continues the line comment 84 // \p Previous. The original column of \p MinColumnToken is used to determine 85 // whether \p FormatTok is indented enough to the right to continue \p Previous. 86 static bool continuesLineComment(const FormatToken &FormatTok, 87 const FormatToken *Previous, 88 const FormatToken *MinColumnToken) { 89 if (!Previous || !MinColumnToken) 90 return false; 91 unsigned MinContinueColumn = 92 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 93 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 94 isLineComment(*Previous) && 95 FormatTok.OriginalColumn >= MinContinueColumn; 96 } 97 98 class ScopedMacroState : public FormatTokenSource { 99 public: 100 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 101 FormatToken *&ResetToken) 102 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 103 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 104 Token(nullptr), PreviousToken(nullptr) { 105 FakeEOF.Tok.startToken(); 106 FakeEOF.Tok.setKind(tok::eof); 107 TokenSource = this; 108 Line.Level = 0; 109 Line.InPPDirective = true; 110 } 111 112 ~ScopedMacroState() override { 113 TokenSource = PreviousTokenSource; 114 ResetToken = Token; 115 Line.InPPDirective = false; 116 Line.Level = PreviousLineLevel; 117 } 118 119 FormatToken *getNextToken() override { 120 // The \c UnwrappedLineParser guards against this by never calling 121 // \c getNextToken() after it has encountered the first eof token. 122 assert(!eof()); 123 PreviousToken = Token; 124 Token = PreviousTokenSource->getNextToken(); 125 if (eof()) 126 return &FakeEOF; 127 return Token; 128 } 129 130 FormatToken *getPreviousToken() override { 131 return PreviousTokenSource->getPreviousToken(); 132 } 133 134 FormatToken *peekNextToken() override { 135 if (eof()) 136 return &FakeEOF; 137 return PreviousTokenSource->peekNextToken(); 138 } 139 140 bool isEOF() override { return PreviousTokenSource->isEOF(); } 141 142 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 143 144 FormatToken *setPosition(unsigned Position) override { 145 PreviousToken = nullptr; 146 Token = PreviousTokenSource->setPosition(Position); 147 return Token; 148 } 149 150 private: 151 bool eof() { 152 return Token && Token->HasUnescapedNewline && 153 !continuesLineComment(*Token, PreviousToken, 154 /*MinColumnToken=*/PreviousToken); 155 } 156 157 FormatToken FakeEOF; 158 UnwrappedLine &Line; 159 FormatTokenSource *&TokenSource; 160 FormatToken *&ResetToken; 161 unsigned PreviousLineLevel; 162 FormatTokenSource *PreviousTokenSource; 163 164 FormatToken *Token; 165 FormatToken *PreviousToken; 166 }; 167 168 } // end anonymous namespace 169 170 class ScopedLineState { 171 public: 172 ScopedLineState(UnwrappedLineParser &Parser, 173 bool SwitchToPreprocessorLines = false) 174 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 175 if (SwitchToPreprocessorLines) 176 Parser.CurrentLines = &Parser.PreprocessorDirectives; 177 else if (!Parser.Line->Tokens.empty()) 178 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 179 PreBlockLine = std::move(Parser.Line); 180 Parser.Line = std::make_unique<UnwrappedLine>(); 181 Parser.Line->Level = PreBlockLine->Level; 182 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 183 } 184 185 ~ScopedLineState() { 186 if (!Parser.Line->Tokens.empty()) { 187 Parser.addUnwrappedLine(); 188 } 189 assert(Parser.Line->Tokens.empty()); 190 Parser.Line = std::move(PreBlockLine); 191 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 192 Parser.MustBreakBeforeNextToken = true; 193 Parser.CurrentLines = OriginalLines; 194 } 195 196 private: 197 UnwrappedLineParser &Parser; 198 199 std::unique_ptr<UnwrappedLine> PreBlockLine; 200 SmallVectorImpl<UnwrappedLine> *OriginalLines; 201 }; 202 203 class CompoundStatementIndenter { 204 public: 205 CompoundStatementIndenter(UnwrappedLineParser *Parser, 206 const FormatStyle &Style, unsigned &LineLevel) 207 : CompoundStatementIndenter(Parser, LineLevel, 208 Style.BraceWrapping.AfterControlStatement, 209 Style.BraceWrapping.IndentBraces) {} 210 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 211 bool WrapBrace, bool IndentBrace) 212 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 213 if (WrapBrace) 214 Parser->addUnwrappedLine(); 215 if (IndentBrace) 216 ++LineLevel; 217 } 218 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 219 220 private: 221 unsigned &LineLevel; 222 unsigned OldLineLevel; 223 }; 224 225 namespace { 226 227 class IndexedTokenSource : public FormatTokenSource { 228 public: 229 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 230 : Tokens(Tokens), Position(-1) {} 231 232 FormatToken *getNextToken() override { 233 if (Position >= 0 && Tokens[Position]->is(tok::eof)) { 234 LLVM_DEBUG({ 235 llvm::dbgs() << "Next "; 236 dbgToken(Position); 237 }); 238 return Tokens[Position]; 239 } 240 ++Position; 241 LLVM_DEBUG({ 242 llvm::dbgs() << "Next "; 243 dbgToken(Position); 244 }); 245 return Tokens[Position]; 246 } 247 248 FormatToken *getPreviousToken() override { 249 assert(Position > 0); 250 return Tokens[Position - 1]; 251 } 252 253 FormatToken *peekNextToken() override { 254 int Next = Position + 1; 255 LLVM_DEBUG({ 256 llvm::dbgs() << "Peeking "; 257 dbgToken(Next); 258 }); 259 return Tokens[Next]; 260 } 261 262 bool isEOF() override { return Tokens[Position]->is(tok::eof); } 263 264 unsigned getPosition() override { 265 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n"); 266 assert(Position >= 0); 267 return Position; 268 } 269 270 FormatToken *setPosition(unsigned P) override { 271 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n"); 272 Position = P; 273 return Tokens[Position]; 274 } 275 276 void reset() { Position = -1; } 277 278 private: 279 void dbgToken(int Position, llvm::StringRef Indent = "") { 280 FormatToken *Tok = Tokens[Position]; 281 llvm::dbgs() << Indent << "[" << Position 282 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText 283 << ", Macro: " << !!Tok->MacroCtx << "\n"; 284 } 285 286 ArrayRef<FormatToken *> Tokens; 287 int Position; 288 }; 289 290 } // end anonymous namespace 291 292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 293 const AdditionalKeywords &Keywords, 294 unsigned FirstStartColumn, 295 ArrayRef<FormatToken *> Tokens, 296 UnwrappedLineConsumer &Callback) 297 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 298 CurrentLines(&Lines), Style(Style), Keywords(Keywords), 299 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 300 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 301 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 302 ? IG_Rejected 303 : IG_Inited), 304 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} 305 306 void UnwrappedLineParser::reset() { 307 PPBranchLevel = -1; 308 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 309 ? IG_Rejected 310 : IG_Inited; 311 IncludeGuardToken = nullptr; 312 Line.reset(new UnwrappedLine); 313 CommentsBeforeNextToken.clear(); 314 FormatTok = nullptr; 315 MustBreakBeforeNextToken = false; 316 PreprocessorDirectives.clear(); 317 CurrentLines = &Lines; 318 DeclarationScopeStack.clear(); 319 PPStack.clear(); 320 Line->FirstStartColumn = FirstStartColumn; 321 } 322 323 void UnwrappedLineParser::parse() { 324 IndexedTokenSource TokenSource(AllTokens); 325 Line->FirstStartColumn = FirstStartColumn; 326 do { 327 LLVM_DEBUG(llvm::dbgs() << "----\n"); 328 reset(); 329 Tokens = &TokenSource; 330 TokenSource.reset(); 331 332 readToken(); 333 parseFile(); 334 335 // If we found an include guard then all preprocessor directives (other than 336 // the guard) are over-indented by one. 337 if (IncludeGuard == IG_Found) 338 for (auto &Line : Lines) 339 if (Line.InPPDirective && Line.Level > 0) 340 --Line.Level; 341 342 // Create line with eof token. 343 pushToken(FormatTok); 344 addUnwrappedLine(); 345 346 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 347 E = Lines.end(); 348 I != E; ++I) { 349 Callback.consumeUnwrappedLine(*I); 350 } 351 Callback.finishRun(); 352 Lines.clear(); 353 while (!PPLevelBranchIndex.empty() && 354 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 355 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 356 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 357 } 358 if (!PPLevelBranchIndex.empty()) { 359 ++PPLevelBranchIndex.back(); 360 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 361 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 362 } 363 } while (!PPLevelBranchIndex.empty()); 364 } 365 366 void UnwrappedLineParser::parseFile() { 367 // The top-level context in a file always has declarations, except for pre- 368 // processor directives and JavaScript files. 369 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 370 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 371 MustBeDeclaration); 372 if (Style.Language == FormatStyle::LK_TextProto) 373 parseBracedList(); 374 else 375 parseLevel(/*HasOpeningBrace=*/false); 376 // Make sure to format the remaining tokens. 377 // 378 // LK_TextProto is special since its top-level is parsed as the body of a 379 // braced list, which does not necessarily have natural line separators such 380 // as a semicolon. Comments after the last entry that have been determined to 381 // not belong to that line, as in: 382 // key: value 383 // // endfile comment 384 // do not have a chance to be put on a line of their own until this point. 385 // Here we add this newline before end-of-file comments. 386 if (Style.Language == FormatStyle::LK_TextProto && 387 !CommentsBeforeNextToken.empty()) 388 addUnwrappedLine(); 389 flushComments(true); 390 addUnwrappedLine(); 391 } 392 393 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 394 do { 395 switch (FormatTok->Tok.getKind()) { 396 case tok::l_brace: 397 return; 398 default: 399 if (FormatTok->is(Keywords.kw_where)) { 400 addUnwrappedLine(); 401 nextToken(); 402 parseCSharpGenericTypeConstraint(); 403 break; 404 } 405 nextToken(); 406 break; 407 } 408 } while (!eof()); 409 } 410 411 void UnwrappedLineParser::parseCSharpAttribute() { 412 int UnpairedSquareBrackets = 1; 413 do { 414 switch (FormatTok->Tok.getKind()) { 415 case tok::r_square: 416 nextToken(); 417 --UnpairedSquareBrackets; 418 if (UnpairedSquareBrackets == 0) { 419 addUnwrappedLine(); 420 return; 421 } 422 break; 423 case tok::l_square: 424 ++UnpairedSquareBrackets; 425 nextToken(); 426 break; 427 default: 428 nextToken(); 429 break; 430 } 431 } while (!eof()); 432 } 433 434 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 435 bool SwitchLabelEncountered = false; 436 do { 437 tok::TokenKind kind = FormatTok->Tok.getKind(); 438 if (FormatTok->getType() == TT_MacroBlockBegin) { 439 kind = tok::l_brace; 440 } else if (FormatTok->getType() == TT_MacroBlockEnd) { 441 kind = tok::r_brace; 442 } 443 444 switch (kind) { 445 case tok::comment: 446 nextToken(); 447 addUnwrappedLine(); 448 break; 449 case tok::l_brace: 450 // FIXME: Add parameter whether this can happen - if this happens, we must 451 // be in a non-declaration context. 452 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) 453 continue; 454 parseBlock(); 455 addUnwrappedLine(); 456 break; 457 case tok::r_brace: 458 if (HasOpeningBrace) 459 return; 460 nextToken(); 461 addUnwrappedLine(); 462 break; 463 case tok::kw_default: { 464 unsigned StoredPosition = Tokens->getPosition(); 465 FormatToken *Next; 466 do { 467 Next = Tokens->getNextToken(); 468 } while (Next->is(tok::comment)); 469 FormatTok = Tokens->setPosition(StoredPosition); 470 if (Next && Next->isNot(tok::colon)) { 471 // default not followed by ':' is not a case label; treat it like 472 // an identifier. 473 parseStructuralElement(); 474 break; 475 } 476 // Else, if it is 'default:', fall through to the case handling. 477 LLVM_FALLTHROUGH; 478 } 479 case tok::kw_case: 480 if (Style.isJavaScript() && Line->MustBeDeclaration) { 481 // A 'case: string' style field declaration. 482 parseStructuralElement(); 483 break; 484 } 485 if (!SwitchLabelEncountered && 486 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 487 ++Line->Level; 488 SwitchLabelEncountered = true; 489 parseStructuralElement(); 490 break; 491 case tok::l_square: 492 if (Style.isCSharp()) { 493 nextToken(); 494 parseCSharpAttribute(); 495 break; 496 } 497 LLVM_FALLTHROUGH; 498 default: 499 parseStructuralElement(!HasOpeningBrace); 500 break; 501 } 502 } while (!eof()); 503 } 504 505 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 506 // We'll parse forward through the tokens until we hit 507 // a closing brace or eof - note that getNextToken() will 508 // parse macros, so this will magically work inside macro 509 // definitions, too. 510 unsigned StoredPosition = Tokens->getPosition(); 511 FormatToken *Tok = FormatTok; 512 const FormatToken *PrevTok = Tok->Previous; 513 // Keep a stack of positions of lbrace tokens. We will 514 // update information about whether an lbrace starts a 515 // braced init list or a different block during the loop. 516 SmallVector<FormatToken *, 8> LBraceStack; 517 assert(Tok->Tok.is(tok::l_brace)); 518 do { 519 // Get next non-comment token. 520 FormatToken *NextTok; 521 unsigned ReadTokens = 0; 522 do { 523 NextTok = Tokens->getNextToken(); 524 ++ReadTokens; 525 } while (NextTok->is(tok::comment)); 526 527 switch (Tok->Tok.getKind()) { 528 case tok::l_brace: 529 if (Style.isJavaScript() && PrevTok) { 530 if (PrevTok->isOneOf(tok::colon, tok::less)) 531 // A ':' indicates this code is in a type, or a braced list 532 // following a label in an object literal ({a: {b: 1}}). 533 // A '<' could be an object used in a comparison, but that is nonsense 534 // code (can never return true), so more likely it is a generic type 535 // argument (`X<{a: string; b: number}>`). 536 // The code below could be confused by semicolons between the 537 // individual members in a type member list, which would normally 538 // trigger BK_Block. In both cases, this must be parsed as an inline 539 // braced init. 540 Tok->setBlockKind(BK_BracedInit); 541 else if (PrevTok->is(tok::r_paren)) 542 // `) { }` can only occur in function or method declarations in JS. 543 Tok->setBlockKind(BK_Block); 544 } else { 545 Tok->setBlockKind(BK_Unknown); 546 } 547 LBraceStack.push_back(Tok); 548 break; 549 case tok::r_brace: 550 if (LBraceStack.empty()) 551 break; 552 if (LBraceStack.back()->is(BK_Unknown)) { 553 bool ProbablyBracedList = false; 554 if (Style.Language == FormatStyle::LK_Proto) { 555 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 556 } else { 557 // Skip NextTok over preprocessor lines, otherwise we may not 558 // properly diagnose the block as a braced intializer 559 // if the comma separator appears after the pp directive. 560 while (NextTok->is(tok::hash)) { 561 ScopedMacroState MacroState(*Line, Tokens, NextTok); 562 do { 563 NextTok = Tokens->getNextToken(); 564 ++ReadTokens; 565 } while (NextTok->isNot(tok::eof)); 566 } 567 568 // Using OriginalColumn to distinguish between ObjC methods and 569 // binary operators is a bit hacky. 570 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 571 NextTok->OriginalColumn == 0; 572 573 // If there is a comma, semicolon or right paren after the closing 574 // brace, we assume this is a braced initializer list. Note that 575 // regardless how we mark inner braces here, we will overwrite the 576 // BlockKind later if we parse a braced list (where all blocks 577 // inside are by default braced lists), or when we explicitly detect 578 // blocks (for example while parsing lambdas). 579 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 580 // braced list in JS. 581 ProbablyBracedList = 582 (Style.isJavaScript() && 583 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 584 Keywords.kw_as)) || 585 (Style.isCpp() && NextTok->is(tok::l_paren)) || 586 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 587 tok::r_paren, tok::r_square, tok::l_brace, 588 tok::ellipsis) || 589 (NextTok->is(tok::identifier) && 590 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || 591 (NextTok->is(tok::semi) && 592 (!ExpectClassBody || LBraceStack.size() != 1)) || 593 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 594 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 595 // We can have an array subscript after a braced init 596 // list, but C++11 attributes are expected after blocks. 597 NextTok = Tokens->getNextToken(); 598 ++ReadTokens; 599 ProbablyBracedList = NextTok->isNot(tok::l_square); 600 } 601 } 602 if (ProbablyBracedList) { 603 Tok->setBlockKind(BK_BracedInit); 604 LBraceStack.back()->setBlockKind(BK_BracedInit); 605 } else { 606 Tok->setBlockKind(BK_Block); 607 LBraceStack.back()->setBlockKind(BK_Block); 608 } 609 } 610 LBraceStack.pop_back(); 611 break; 612 case tok::identifier: 613 if (!Tok->is(TT_StatementMacro)) 614 break; 615 LLVM_FALLTHROUGH; 616 case tok::at: 617 case tok::semi: 618 case tok::kw_if: 619 case tok::kw_while: 620 case tok::kw_for: 621 case tok::kw_switch: 622 case tok::kw_try: 623 case tok::kw___try: 624 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown)) 625 LBraceStack.back()->setBlockKind(BK_Block); 626 break; 627 default: 628 break; 629 } 630 PrevTok = Tok; 631 Tok = NextTok; 632 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 633 634 // Assume other blocks for all unclosed opening braces. 635 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 636 if (LBraceStack[i]->is(BK_Unknown)) 637 LBraceStack[i]->setBlockKind(BK_Block); 638 } 639 640 FormatTok = Tokens->setPosition(StoredPosition); 641 } 642 643 template <class T> 644 static inline void hash_combine(std::size_t &seed, const T &v) { 645 std::hash<T> hasher; 646 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 647 } 648 649 size_t UnwrappedLineParser::computePPHash() const { 650 size_t h = 0; 651 for (const auto &i : PPStack) { 652 hash_combine(h, size_t(i.Kind)); 653 hash_combine(h, i.Line); 654 } 655 return h; 656 } 657 658 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels, 659 bool MunchSemi, 660 bool UnindentWhitesmithsBraces) { 661 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && 662 "'{' or macro block token expected"); 663 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 664 FormatTok->setBlockKind(BK_Block); 665 666 // For Whitesmiths mode, jump to the next level prior to skipping over the 667 // braces. 668 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 669 ++Line->Level; 670 671 size_t PPStartHash = computePPHash(); 672 673 unsigned InitialLevel = Line->Level; 674 nextToken(/*LevelDifference=*/AddLevels); 675 676 if (MacroBlock && FormatTok->is(tok::l_paren)) 677 parseParens(); 678 679 size_t NbPreprocessorDirectives = 680 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; 681 addUnwrappedLine(); 682 size_t OpeningLineIndex = 683 CurrentLines->empty() 684 ? (UnwrappedLine::kInvalidIndex) 685 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 686 687 // Whitesmiths is weird here. The brace needs to be indented for the namespace 688 // block, but the block itself may not be indented depending on the style 689 // settings. This allows the format to back up one level in those cases. 690 if (UnindentWhitesmithsBraces) 691 --Line->Level; 692 693 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 694 MustBeDeclaration); 695 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 696 Line->Level += AddLevels; 697 parseLevel(/*HasOpeningBrace=*/true); 698 699 if (eof()) 700 return; 701 702 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) 703 : !FormatTok->is(tok::r_brace)) { 704 Line->Level = InitialLevel; 705 FormatTok->setBlockKind(BK_Block); 706 return; 707 } 708 709 size_t PPEndHash = computePPHash(); 710 711 // Munch the closing brace. 712 nextToken(/*LevelDifference=*/-AddLevels); 713 714 if (MacroBlock && FormatTok->is(tok::l_paren)) 715 parseParens(); 716 717 if (FormatTok->is(tok::arrow)) { 718 // Following the } we can find a trailing return type arrow 719 // as part of an implicit conversion constraint. 720 nextToken(); 721 parseStructuralElement(); 722 } 723 724 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 725 nextToken(); 726 727 Line->Level = InitialLevel; 728 729 if (PPStartHash == PPEndHash) { 730 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 731 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 732 // Update the opening line to add the forward reference as well 733 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 734 CurrentLines->size() - 1; 735 } 736 } 737 } 738 739 static bool isGoogScope(const UnwrappedLine &Line) { 740 // FIXME: Closure-library specific stuff should not be hard-coded but be 741 // configurable. 742 if (Line.Tokens.size() < 4) 743 return false; 744 auto I = Line.Tokens.begin(); 745 if (I->Tok->TokenText != "goog") 746 return false; 747 ++I; 748 if (I->Tok->isNot(tok::period)) 749 return false; 750 ++I; 751 if (I->Tok->TokenText != "scope") 752 return false; 753 ++I; 754 return I->Tok->is(tok::l_paren); 755 } 756 757 static bool isIIFE(const UnwrappedLine &Line, 758 const AdditionalKeywords &Keywords) { 759 // Look for the start of an immediately invoked anonymous function. 760 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 761 // This is commonly done in JavaScript to create a new, anonymous scope. 762 // Example: (function() { ... })() 763 if (Line.Tokens.size() < 3) 764 return false; 765 auto I = Line.Tokens.begin(); 766 if (I->Tok->isNot(tok::l_paren)) 767 return false; 768 ++I; 769 if (I->Tok->isNot(Keywords.kw_function)) 770 return false; 771 ++I; 772 return I->Tok->is(tok::l_paren); 773 } 774 775 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 776 const FormatToken &InitialToken) { 777 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro)) 778 return Style.BraceWrapping.AfterNamespace; 779 if (InitialToken.is(tok::kw_class)) 780 return Style.BraceWrapping.AfterClass; 781 if (InitialToken.is(tok::kw_union)) 782 return Style.BraceWrapping.AfterUnion; 783 if (InitialToken.is(tok::kw_struct)) 784 return Style.BraceWrapping.AfterStruct; 785 return false; 786 } 787 788 void UnwrappedLineParser::parseChildBlock() { 789 FormatTok->setBlockKind(BK_Block); 790 nextToken(); 791 { 792 bool SkipIndent = (Style.isJavaScript() && 793 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 794 ScopedLineState LineState(*this); 795 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 796 /*MustBeDeclaration=*/false); 797 Line->Level += SkipIndent ? 0 : 1; 798 parseLevel(/*HasOpeningBrace=*/true); 799 flushComments(isOnNewLine(*FormatTok)); 800 Line->Level -= SkipIndent ? 0 : 1; 801 } 802 nextToken(); 803 } 804 805 void UnwrappedLineParser::parsePPDirective() { 806 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 807 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 808 809 nextToken(); 810 811 if (!FormatTok->Tok.getIdentifierInfo()) { 812 parsePPUnknown(); 813 return; 814 } 815 816 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 817 case tok::pp_define: 818 parsePPDefine(); 819 return; 820 case tok::pp_if: 821 parsePPIf(/*IfDef=*/false); 822 break; 823 case tok::pp_ifdef: 824 case tok::pp_ifndef: 825 parsePPIf(/*IfDef=*/true); 826 break; 827 case tok::pp_else: 828 parsePPElse(); 829 break; 830 case tok::pp_elifdef: 831 case tok::pp_elifndef: 832 case tok::pp_elif: 833 parsePPElIf(); 834 break; 835 case tok::pp_endif: 836 parsePPEndIf(); 837 break; 838 default: 839 parsePPUnknown(); 840 break; 841 } 842 } 843 844 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 845 size_t Line = CurrentLines->size(); 846 if (CurrentLines == &PreprocessorDirectives) 847 Line += Lines.size(); 848 849 if (Unreachable || 850 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) 851 PPStack.push_back({PP_Unreachable, Line}); 852 else 853 PPStack.push_back({PP_Conditional, Line}); 854 } 855 856 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 857 ++PPBranchLevel; 858 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 859 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 860 PPLevelBranchIndex.push_back(0); 861 PPLevelBranchCount.push_back(0); 862 } 863 PPChainBranchIndex.push(0); 864 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 865 conditionalCompilationCondition(Unreachable || Skip); 866 } 867 868 void UnwrappedLineParser::conditionalCompilationAlternative() { 869 if (!PPStack.empty()) 870 PPStack.pop_back(); 871 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 872 if (!PPChainBranchIndex.empty()) 873 ++PPChainBranchIndex.top(); 874 conditionalCompilationCondition( 875 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 876 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 877 } 878 879 void UnwrappedLineParser::conditionalCompilationEnd() { 880 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 881 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 882 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 883 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 884 } 885 } 886 // Guard against #endif's without #if. 887 if (PPBranchLevel > -1) 888 --PPBranchLevel; 889 if (!PPChainBranchIndex.empty()) 890 PPChainBranchIndex.pop(); 891 if (!PPStack.empty()) 892 PPStack.pop_back(); 893 } 894 895 void UnwrappedLineParser::parsePPIf(bool IfDef) { 896 bool IfNDef = FormatTok->is(tok::pp_ifndef); 897 nextToken(); 898 bool Unreachable = false; 899 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 900 Unreachable = true; 901 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 902 Unreachable = true; 903 conditionalCompilationStart(Unreachable); 904 FormatToken *IfCondition = FormatTok; 905 // If there's a #ifndef on the first line, and the only lines before it are 906 // comments, it could be an include guard. 907 bool MaybeIncludeGuard = IfNDef; 908 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) 909 for (auto &Line : Lines) { 910 if (!Line.Tokens.front().Tok->is(tok::comment)) { 911 MaybeIncludeGuard = false; 912 IncludeGuard = IG_Rejected; 913 break; 914 } 915 } 916 --PPBranchLevel; 917 parsePPUnknown(); 918 ++PPBranchLevel; 919 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 920 IncludeGuard = IG_IfNdefed; 921 IncludeGuardToken = IfCondition; 922 } 923 } 924 925 void UnwrappedLineParser::parsePPElse() { 926 // If a potential include guard has an #else, it's not an include guard. 927 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 928 IncludeGuard = IG_Rejected; 929 conditionalCompilationAlternative(); 930 if (PPBranchLevel > -1) 931 --PPBranchLevel; 932 parsePPUnknown(); 933 ++PPBranchLevel; 934 } 935 936 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 937 938 void UnwrappedLineParser::parsePPEndIf() { 939 conditionalCompilationEnd(); 940 parsePPUnknown(); 941 // If the #endif of a potential include guard is the last thing in the file, 942 // then we found an include guard. 943 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 944 Style.IndentPPDirectives != FormatStyle::PPDIS_None) 945 IncludeGuard = IG_Found; 946 } 947 948 void UnwrappedLineParser::parsePPDefine() { 949 nextToken(); 950 951 if (!FormatTok->Tok.getIdentifierInfo()) { 952 IncludeGuard = IG_Rejected; 953 IncludeGuardToken = nullptr; 954 parsePPUnknown(); 955 return; 956 } 957 958 if (IncludeGuard == IG_IfNdefed && 959 IncludeGuardToken->TokenText == FormatTok->TokenText) { 960 IncludeGuard = IG_Defined; 961 IncludeGuardToken = nullptr; 962 for (auto &Line : Lines) { 963 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 964 IncludeGuard = IG_Rejected; 965 break; 966 } 967 } 968 } 969 970 nextToken(); 971 if (FormatTok->Tok.getKind() == tok::l_paren && 972 FormatTok->WhitespaceRange.getBegin() == 973 FormatTok->WhitespaceRange.getEnd()) { 974 parseParens(); 975 } 976 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 977 Line->Level += PPBranchLevel + 1; 978 addUnwrappedLine(); 979 ++Line->Level; 980 981 // Errors during a preprocessor directive can only affect the layout of the 982 // preprocessor directive, and thus we ignore them. An alternative approach 983 // would be to use the same approach we use on the file level (no 984 // re-indentation if there was a structural error) within the macro 985 // definition. 986 parseFile(); 987 } 988 989 void UnwrappedLineParser::parsePPUnknown() { 990 do { 991 nextToken(); 992 } while (!eof()); 993 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 994 Line->Level += PPBranchLevel + 1; 995 addUnwrappedLine(); 996 } 997 998 // Here we exclude certain tokens that are not usually the first token in an 999 // unwrapped line. This is used in attempt to distinguish macro calls without 1000 // trailing semicolons from other constructs split to several lines. 1001 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1002 // Semicolon can be a null-statement, l_square can be a start of a macro or 1003 // a C++11 attribute, but this doesn't seem to be common. 1004 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 1005 Tok.isNot(TT_AttributeSquare) && 1006 // Tokens that can only be used as binary operators and a part of 1007 // overloaded operator names. 1008 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 1009 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 1010 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 1011 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 1012 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 1013 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 1014 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 1015 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 1016 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 1017 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 1018 Tok.isNot(tok::lesslessequal) && 1019 // Colon is used in labels, base class lists, initializer lists, 1020 // range-based for loops, ternary operator, but should never be the 1021 // first token in an unwrapped line. 1022 Tok.isNot(tok::colon) && 1023 // 'noexcept' is a trailing annotation. 1024 Tok.isNot(tok::kw_noexcept); 1025 } 1026 1027 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1028 const FormatToken *FormatTok) { 1029 // FIXME: This returns true for C/C++ keywords like 'struct'. 1030 return FormatTok->is(tok::identifier) && 1031 (FormatTok->Tok.getIdentifierInfo() == nullptr || 1032 !FormatTok->isOneOf( 1033 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1034 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1035 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1036 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1037 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1038 Keywords.kw_instanceof, Keywords.kw_interface, 1039 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1040 } 1041 1042 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1043 const FormatToken *FormatTok) { 1044 return FormatTok->Tok.isLiteral() || 1045 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1046 mustBeJSIdent(Keywords, FormatTok); 1047 } 1048 1049 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1050 // when encountered after a value (see mustBeJSIdentOrValue). 1051 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1052 const FormatToken *FormatTok) { 1053 return FormatTok->isOneOf( 1054 tok::kw_return, Keywords.kw_yield, 1055 // conditionals 1056 tok::kw_if, tok::kw_else, 1057 // loops 1058 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1059 // switch/case 1060 tok::kw_switch, tok::kw_case, 1061 // exceptions 1062 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1063 // declaration 1064 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1065 Keywords.kw_async, Keywords.kw_function, 1066 // import/export 1067 Keywords.kw_import, tok::kw_export); 1068 } 1069 1070 // Checks whether a token is a type in K&R C (aka C78). 1071 static bool isC78Type(const FormatToken &Tok) { 1072 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1073 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1074 tok::identifier); 1075 } 1076 1077 // This function checks whether a token starts the first parameter declaration 1078 // in a K&R C (aka C78) function definition, e.g.: 1079 // int f(a, b) 1080 // short a, b; 1081 // { 1082 // return a + b; 1083 // } 1084 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1085 const FormatToken *FuncName) { 1086 assert(Tok); 1087 assert(Next); 1088 assert(FuncName); 1089 1090 if (FuncName->isNot(tok::identifier)) 1091 return false; 1092 1093 const FormatToken *Prev = FuncName->Previous; 1094 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1095 return false; 1096 1097 if (!isC78Type(*Tok) && 1098 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) 1099 return false; 1100 1101 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1102 return false; 1103 1104 Tok = Tok->Previous; 1105 if (!Tok || Tok->isNot(tok::r_paren)) 1106 return false; 1107 1108 Tok = Tok->Previous; 1109 if (!Tok || Tok->isNot(tok::identifier)) 1110 return false; 1111 1112 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1113 } 1114 1115 void UnwrappedLineParser::parseModuleImport() { 1116 nextToken(); 1117 while (!eof()) { 1118 if (FormatTok->is(tok::colon)) { 1119 FormatTok->setType(TT_ModulePartitionColon); 1120 } 1121 // Handle import <foo/bar.h> as we would an include statement. 1122 else if (FormatTok->is(tok::less)) { 1123 nextToken(); 1124 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1125 // Mark tokens up to the trailing line comments as implicit string 1126 // literals. 1127 if (FormatTok->isNot(tok::comment) && 1128 !FormatTok->TokenText.startswith("//")) 1129 FormatTok->setType(TT_ImplicitStringLiteral); 1130 nextToken(); 1131 } 1132 } 1133 if (FormatTok->is(tok::semi)) { 1134 nextToken(); 1135 break; 1136 } 1137 nextToken(); 1138 } 1139 1140 addUnwrappedLine(); 1141 } 1142 1143 // readTokenWithJavaScriptASI reads the next token and terminates the current 1144 // line if JavaScript Automatic Semicolon Insertion must 1145 // happen between the current token and the next token. 1146 // 1147 // This method is conservative - it cannot cover all edge cases of JavaScript, 1148 // but only aims to correctly handle certain well known cases. It *must not* 1149 // return true in speculative cases. 1150 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1151 FormatToken *Previous = FormatTok; 1152 readToken(); 1153 FormatToken *Next = FormatTok; 1154 1155 bool IsOnSameLine = 1156 CommentsBeforeNextToken.empty() 1157 ? Next->NewlinesBefore == 0 1158 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1159 if (IsOnSameLine) 1160 return; 1161 1162 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1163 bool PreviousStartsTemplateExpr = 1164 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); 1165 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1166 // If the line contains an '@' sign, the previous token might be an 1167 // annotation, which can precede another identifier/value. 1168 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1169 return LineNode.Tok->is(tok::at); 1170 }); 1171 if (HasAt) 1172 return; 1173 } 1174 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1175 return addUnwrappedLine(); 1176 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1177 bool NextEndsTemplateExpr = 1178 Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); 1179 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1180 (PreviousMustBeValue || 1181 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1182 tok::minusminus))) 1183 return addUnwrappedLine(); 1184 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1185 isJSDeclOrStmt(Keywords, Next)) 1186 return addUnwrappedLine(); 1187 } 1188 1189 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { 1190 if (Style.Language == FormatStyle::LK_TableGen && 1191 FormatTok->is(tok::pp_include)) { 1192 nextToken(); 1193 if (FormatTok->is(tok::string_literal)) 1194 nextToken(); 1195 addUnwrappedLine(); 1196 return; 1197 } 1198 switch (FormatTok->Tok.getKind()) { 1199 case tok::kw_asm: 1200 nextToken(); 1201 if (FormatTok->is(tok::l_brace)) { 1202 FormatTok->setType(TT_InlineASMBrace); 1203 nextToken(); 1204 while (FormatTok && FormatTok->isNot(tok::eof)) { 1205 if (FormatTok->is(tok::r_brace)) { 1206 FormatTok->setType(TT_InlineASMBrace); 1207 nextToken(); 1208 addUnwrappedLine(); 1209 break; 1210 } 1211 FormatTok->Finalized = true; 1212 nextToken(); 1213 } 1214 } 1215 break; 1216 case tok::kw_namespace: 1217 parseNamespace(); 1218 return; 1219 case tok::kw_public: 1220 case tok::kw_protected: 1221 case tok::kw_private: 1222 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1223 Style.isCSharp()) 1224 nextToken(); 1225 else 1226 parseAccessSpecifier(); 1227 return; 1228 case tok::kw_if: 1229 if (Style.isJavaScript() && Line->MustBeDeclaration) 1230 // field/method declaration. 1231 break; 1232 parseIfThenElse(); 1233 return; 1234 case tok::kw_for: 1235 case tok::kw_while: 1236 if (Style.isJavaScript() && Line->MustBeDeclaration) 1237 // field/method declaration. 1238 break; 1239 parseForOrWhileLoop(); 1240 return; 1241 case tok::kw_do: 1242 if (Style.isJavaScript() && Line->MustBeDeclaration) 1243 // field/method declaration. 1244 break; 1245 parseDoWhile(); 1246 return; 1247 case tok::kw_switch: 1248 if (Style.isJavaScript() && Line->MustBeDeclaration) 1249 // 'switch: string' field declaration. 1250 break; 1251 parseSwitch(); 1252 return; 1253 case tok::kw_default: 1254 if (Style.isJavaScript() && Line->MustBeDeclaration) 1255 // 'default: string' field declaration. 1256 break; 1257 nextToken(); 1258 if (FormatTok->is(tok::colon)) { 1259 parseLabel(); 1260 return; 1261 } 1262 // e.g. "default void f() {}" in a Java interface. 1263 break; 1264 case tok::kw_case: 1265 if (Style.isJavaScript() && Line->MustBeDeclaration) 1266 // 'case: string' field declaration. 1267 break; 1268 parseCaseLabel(); 1269 return; 1270 case tok::kw_try: 1271 case tok::kw___try: 1272 if (Style.isJavaScript() && Line->MustBeDeclaration) 1273 // field/method declaration. 1274 break; 1275 parseTryCatch(); 1276 return; 1277 case tok::kw_extern: 1278 nextToken(); 1279 if (FormatTok->Tok.is(tok::string_literal)) { 1280 nextToken(); 1281 if (FormatTok->Tok.is(tok::l_brace)) { 1282 if (Style.BraceWrapping.AfterExternBlock) 1283 addUnwrappedLine(); 1284 // Either we indent or for backwards compatibility we follow the 1285 // AfterExternBlock style. 1286 unsigned AddLevels = 1287 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1288 (Style.BraceWrapping.AfterExternBlock && 1289 Style.IndentExternBlock == 1290 FormatStyle::IEBS_AfterExternBlock) 1291 ? 1u 1292 : 0u; 1293 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1294 addUnwrappedLine(); 1295 return; 1296 } 1297 } 1298 break; 1299 case tok::kw_export: 1300 if (Style.isJavaScript()) { 1301 parseJavaScriptEs6ImportExport(); 1302 return; 1303 } 1304 if (!Style.isCpp()) 1305 break; 1306 // Handle C++ "(inline|export) namespace". 1307 LLVM_FALLTHROUGH; 1308 case tok::kw_inline: 1309 nextToken(); 1310 if (FormatTok->Tok.is(tok::kw_namespace)) { 1311 parseNamespace(); 1312 return; 1313 } 1314 break; 1315 case tok::identifier: 1316 if (FormatTok->is(TT_ForEachMacro)) { 1317 parseForOrWhileLoop(); 1318 return; 1319 } 1320 if (FormatTok->is(TT_MacroBlockBegin)) { 1321 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1322 /*MunchSemi=*/false); 1323 return; 1324 } 1325 if (FormatTok->is(Keywords.kw_import)) { 1326 if (Style.isJavaScript()) { 1327 parseJavaScriptEs6ImportExport(); 1328 return; 1329 } 1330 if (Style.Language == FormatStyle::LK_Proto) { 1331 nextToken(); 1332 if (FormatTok->is(tok::kw_public)) 1333 nextToken(); 1334 if (!FormatTok->is(tok::string_literal)) 1335 return; 1336 nextToken(); 1337 if (FormatTok->is(tok::semi)) 1338 nextToken(); 1339 addUnwrappedLine(); 1340 return; 1341 } 1342 if (Style.isCpp()) { 1343 parseModuleImport(); 1344 return; 1345 } 1346 } 1347 if (Style.isCpp() && 1348 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1349 Keywords.kw_slots, Keywords.kw_qslots)) { 1350 nextToken(); 1351 if (FormatTok->is(tok::colon)) { 1352 nextToken(); 1353 addUnwrappedLine(); 1354 return; 1355 } 1356 } 1357 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1358 parseStatementMacro(); 1359 return; 1360 } 1361 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) { 1362 parseNamespace(); 1363 return; 1364 } 1365 // In all other cases, parse the declaration. 1366 break; 1367 default: 1368 break; 1369 } 1370 do { 1371 const FormatToken *Previous = FormatTok->Previous; 1372 switch (FormatTok->Tok.getKind()) { 1373 case tok::at: 1374 nextToken(); 1375 if (FormatTok->Tok.is(tok::l_brace)) { 1376 nextToken(); 1377 parseBracedList(); 1378 break; 1379 } else if (Style.Language == FormatStyle::LK_Java && 1380 FormatTok->is(Keywords.kw_interface)) { 1381 nextToken(); 1382 break; 1383 } 1384 switch (FormatTok->Tok.getObjCKeywordID()) { 1385 case tok::objc_public: 1386 case tok::objc_protected: 1387 case tok::objc_package: 1388 case tok::objc_private: 1389 return parseAccessSpecifier(); 1390 case tok::objc_interface: 1391 case tok::objc_implementation: 1392 return parseObjCInterfaceOrImplementation(); 1393 case tok::objc_protocol: 1394 if (parseObjCProtocol()) 1395 return; 1396 break; 1397 case tok::objc_end: 1398 return; // Handled by the caller. 1399 case tok::objc_optional: 1400 case tok::objc_required: 1401 nextToken(); 1402 addUnwrappedLine(); 1403 return; 1404 case tok::objc_autoreleasepool: 1405 nextToken(); 1406 if (FormatTok->Tok.is(tok::l_brace)) { 1407 if (Style.BraceWrapping.AfterControlStatement == 1408 FormatStyle::BWACS_Always) 1409 addUnwrappedLine(); 1410 parseBlock(); 1411 } 1412 addUnwrappedLine(); 1413 return; 1414 case tok::objc_synchronized: 1415 nextToken(); 1416 if (FormatTok->Tok.is(tok::l_paren)) 1417 // Skip synchronization object 1418 parseParens(); 1419 if (FormatTok->Tok.is(tok::l_brace)) { 1420 if (Style.BraceWrapping.AfterControlStatement == 1421 FormatStyle::BWACS_Always) 1422 addUnwrappedLine(); 1423 parseBlock(); 1424 } 1425 addUnwrappedLine(); 1426 return; 1427 case tok::objc_try: 1428 // This branch isn't strictly necessary (the kw_try case below would 1429 // do this too after the tok::at is parsed above). But be explicit. 1430 parseTryCatch(); 1431 return; 1432 default: 1433 break; 1434 } 1435 break; 1436 case tok::kw_concept: 1437 parseConcept(); 1438 return; 1439 case tok::kw_requires: 1440 parseRequires(); 1441 return; 1442 case tok::kw_enum: 1443 // Ignore if this is part of "template <enum ...". 1444 if (Previous && Previous->is(tok::less)) { 1445 nextToken(); 1446 break; 1447 } 1448 1449 // parseEnum falls through and does not yet add an unwrapped line as an 1450 // enum definition can start a structural element. 1451 if (!parseEnum()) 1452 break; 1453 // This only applies for C++. 1454 if (!Style.isCpp()) { 1455 addUnwrappedLine(); 1456 return; 1457 } 1458 break; 1459 case tok::kw_typedef: 1460 nextToken(); 1461 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1462 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1463 Keywords.kw_CF_CLOSED_ENUM, 1464 Keywords.kw_NS_CLOSED_ENUM)) 1465 parseEnum(); 1466 break; 1467 case tok::kw_struct: 1468 case tok::kw_union: 1469 case tok::kw_class: 1470 if (parseStructLike()) { 1471 return; 1472 } 1473 break; 1474 case tok::period: 1475 nextToken(); 1476 // In Java, classes have an implicit static member "class". 1477 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1478 FormatTok->is(tok::kw_class)) 1479 nextToken(); 1480 if (Style.isJavaScript() && FormatTok && 1481 FormatTok->Tok.getIdentifierInfo()) 1482 // JavaScript only has pseudo keywords, all keywords are allowed to 1483 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1484 nextToken(); 1485 break; 1486 case tok::semi: 1487 nextToken(); 1488 addUnwrappedLine(); 1489 return; 1490 case tok::r_brace: 1491 addUnwrappedLine(); 1492 return; 1493 case tok::l_paren: { 1494 parseParens(); 1495 // Break the unwrapped line if a K&R C function definition has a parameter 1496 // declaration. 1497 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof)) 1498 break; 1499 if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) { 1500 addUnwrappedLine(); 1501 return; 1502 } 1503 break; 1504 } 1505 case tok::kw_operator: 1506 nextToken(); 1507 if (FormatTok->isBinaryOperator()) 1508 nextToken(); 1509 break; 1510 case tok::caret: 1511 nextToken(); 1512 if (FormatTok->Tok.isAnyIdentifier() || 1513 FormatTok->isSimpleTypeSpecifier()) 1514 nextToken(); 1515 if (FormatTok->is(tok::l_paren)) 1516 parseParens(); 1517 if (FormatTok->is(tok::l_brace)) 1518 parseChildBlock(); 1519 break; 1520 case tok::l_brace: 1521 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1522 // A block outside of parentheses must be the last part of a 1523 // structural element. 1524 // FIXME: Figure out cases where this is not true, and add projections 1525 // for them (the one we know is missing are lambdas). 1526 if (Style.BraceWrapping.AfterFunction) 1527 addUnwrappedLine(); 1528 FormatTok->setType(TT_FunctionLBrace); 1529 parseBlock(); 1530 addUnwrappedLine(); 1531 return; 1532 } 1533 // Otherwise this was a braced init list, and the structural 1534 // element continues. 1535 break; 1536 case tok::kw_try: 1537 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1538 // field/method declaration. 1539 nextToken(); 1540 break; 1541 } 1542 // We arrive here when parsing function-try blocks. 1543 if (Style.BraceWrapping.AfterFunction) 1544 addUnwrappedLine(); 1545 parseTryCatch(); 1546 return; 1547 case tok::identifier: { 1548 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1549 Line->MustBeDeclaration) { 1550 addUnwrappedLine(); 1551 parseCSharpGenericTypeConstraint(); 1552 break; 1553 } 1554 if (FormatTok->is(TT_MacroBlockEnd)) { 1555 addUnwrappedLine(); 1556 return; 1557 } 1558 1559 // Function declarations (as opposed to function expressions) are parsed 1560 // on their own unwrapped line by continuing this loop. Function 1561 // expressions (functions that are not on their own line) must not create 1562 // a new unwrapped line, so they are special cased below. 1563 size_t TokenCount = Line->Tokens.size(); 1564 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1565 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( 1566 Keywords.kw_async)))) { 1567 tryToParseJSFunction(); 1568 break; 1569 } 1570 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1571 FormatTok->is(Keywords.kw_interface)) { 1572 if (Style.isJavaScript()) { 1573 // In JavaScript/TypeScript, "interface" can be used as a standalone 1574 // identifier, e.g. in `var interface = 1;`. If "interface" is 1575 // followed by another identifier, it is very like to be an actual 1576 // interface declaration. 1577 unsigned StoredPosition = Tokens->getPosition(); 1578 FormatToken *Next = Tokens->getNextToken(); 1579 FormatTok = Tokens->setPosition(StoredPosition); 1580 if (!mustBeJSIdent(Keywords, Next)) { 1581 nextToken(); 1582 break; 1583 } 1584 } 1585 parseRecord(); 1586 addUnwrappedLine(); 1587 return; 1588 } 1589 1590 if (FormatTok->is(Keywords.kw_interface)) { 1591 if (parseStructLike()) { 1592 return; 1593 } 1594 break; 1595 } 1596 1597 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { 1598 parseStatementMacro(); 1599 return; 1600 } 1601 1602 // See if the following token should start a new unwrapped line. 1603 StringRef Text = FormatTok->TokenText; 1604 nextToken(); 1605 1606 // JS doesn't have macros, and within classes colons indicate fields, not 1607 // labels. 1608 if (Style.isJavaScript()) 1609 break; 1610 1611 TokenCount = Line->Tokens.size(); 1612 if (TokenCount == 1 || 1613 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) { 1614 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 1615 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1616 parseLabel(!Style.IndentGotoLabels); 1617 return; 1618 } 1619 // Recognize function-like macro usages without trailing semicolon as 1620 // well as free-standing macros like Q_OBJECT. 1621 bool FunctionLike = FormatTok->is(tok::l_paren); 1622 if (FunctionLike) 1623 parseParens(); 1624 1625 bool FollowedByNewline = 1626 CommentsBeforeNextToken.empty() 1627 ? FormatTok->NewlinesBefore > 0 1628 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 1629 1630 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 1631 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 1632 addUnwrappedLine(); 1633 return; 1634 } 1635 } 1636 break; 1637 } 1638 case tok::equal: 1639 if ((Style.isJavaScript() || Style.isCSharp()) && 1640 FormatTok->is(TT_FatArrow)) { 1641 tryToParseChildBlock(); 1642 break; 1643 } 1644 1645 nextToken(); 1646 if (FormatTok->Tok.is(tok::l_brace)) { 1647 // Block kind should probably be set to BK_BracedInit for any language. 1648 // C# needs this change to ensure that array initialisers and object 1649 // initialisers are indented the same way. 1650 if (Style.isCSharp()) 1651 FormatTok->setBlockKind(BK_BracedInit); 1652 nextToken(); 1653 parseBracedList(); 1654 } else if (Style.Language == FormatStyle::LK_Proto && 1655 FormatTok->Tok.is(tok::less)) { 1656 nextToken(); 1657 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 1658 /*ClosingBraceKind=*/tok::greater); 1659 } 1660 break; 1661 case tok::l_square: 1662 parseSquare(); 1663 break; 1664 case tok::kw_new: 1665 parseNew(); 1666 break; 1667 default: 1668 nextToken(); 1669 break; 1670 } 1671 } while (!eof()); 1672 } 1673 1674 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 1675 assert(FormatTok->is(tok::l_brace)); 1676 if (!Style.isCSharp()) 1677 return false; 1678 // See if it's a property accessor. 1679 if (FormatTok->Previous->isNot(tok::identifier)) 1680 return false; 1681 1682 // See if we are inside a property accessor. 1683 // 1684 // Record the current tokenPosition so that we can advance and 1685 // reset the current token. `Next` is not set yet so we need 1686 // another way to advance along the token stream. 1687 unsigned int StoredPosition = Tokens->getPosition(); 1688 FormatToken *Tok = Tokens->getNextToken(); 1689 1690 // A trivial property accessor is of the form: 1691 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] } 1692 // Track these as they do not require line breaks to be introduced. 1693 bool HasGetOrSet = false; 1694 bool IsTrivialPropertyAccessor = true; 1695 while (!eof()) { 1696 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private, 1697 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get, 1698 Keywords.kw_set)) { 1699 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set)) 1700 HasGetOrSet = true; 1701 Tok = Tokens->getNextToken(); 1702 continue; 1703 } 1704 if (Tok->isNot(tok::r_brace)) 1705 IsTrivialPropertyAccessor = false; 1706 break; 1707 } 1708 1709 if (!HasGetOrSet) { 1710 Tokens->setPosition(StoredPosition); 1711 return false; 1712 } 1713 1714 // Try to parse the property accessor: 1715 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 1716 Tokens->setPosition(StoredPosition); 1717 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 1718 addUnwrappedLine(); 1719 nextToken(); 1720 do { 1721 switch (FormatTok->Tok.getKind()) { 1722 case tok::r_brace: 1723 nextToken(); 1724 if (FormatTok->is(tok::equal)) { 1725 while (!eof() && FormatTok->isNot(tok::semi)) 1726 nextToken(); 1727 nextToken(); 1728 } 1729 addUnwrappedLine(); 1730 return true; 1731 case tok::l_brace: 1732 ++Line->Level; 1733 parseBlock(/*MustBeDeclaration=*/true); 1734 addUnwrappedLine(); 1735 --Line->Level; 1736 break; 1737 case tok::equal: 1738 if (FormatTok->is(TT_FatArrow)) { 1739 ++Line->Level; 1740 do { 1741 nextToken(); 1742 } while (!eof() && FormatTok->isNot(tok::semi)); 1743 nextToken(); 1744 addUnwrappedLine(); 1745 --Line->Level; 1746 break; 1747 } 1748 nextToken(); 1749 break; 1750 default: 1751 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) && 1752 !IsTrivialPropertyAccessor) { 1753 // Non-trivial get/set needs to be on its own line. 1754 addUnwrappedLine(); 1755 } 1756 nextToken(); 1757 } 1758 } while (!eof()); 1759 1760 // Unreachable for well-formed code (paired '{' and '}'). 1761 return true; 1762 } 1763 1764 bool UnwrappedLineParser::tryToParseLambda() { 1765 if (!Style.isCpp()) { 1766 nextToken(); 1767 return false; 1768 } 1769 assert(FormatTok->is(tok::l_square)); 1770 FormatToken &LSquare = *FormatTok; 1771 if (!tryToParseLambdaIntroducer()) 1772 return false; 1773 1774 bool SeenArrow = false; 1775 1776 while (FormatTok->isNot(tok::l_brace)) { 1777 if (FormatTok->isSimpleTypeSpecifier()) { 1778 nextToken(); 1779 continue; 1780 } 1781 switch (FormatTok->Tok.getKind()) { 1782 case tok::l_brace: 1783 break; 1784 case tok::l_paren: 1785 parseParens(); 1786 break; 1787 case tok::amp: 1788 case tok::star: 1789 case tok::kw_const: 1790 case tok::comma: 1791 case tok::less: 1792 case tok::greater: 1793 case tok::identifier: 1794 case tok::numeric_constant: 1795 case tok::coloncolon: 1796 case tok::kw_class: 1797 case tok::kw_mutable: 1798 case tok::kw_noexcept: 1799 case tok::kw_template: 1800 case tok::kw_typename: 1801 nextToken(); 1802 break; 1803 // Specialization of a template with an integer parameter can contain 1804 // arithmetic, logical, comparison and ternary operators. 1805 // 1806 // FIXME: This also accepts sequences of operators that are not in the scope 1807 // of a template argument list. 1808 // 1809 // In a C++ lambda a template type can only occur after an arrow. We use 1810 // this as an heuristic to distinguish between Objective-C expressions 1811 // followed by an `a->b` expression, such as: 1812 // ([obj func:arg] + a->b) 1813 // Otherwise the code below would parse as a lambda. 1814 // 1815 // FIXME: This heuristic is incorrect for C++20 generic lambdas with 1816 // explicit template lists: []<bool b = true && false>(U &&u){} 1817 case tok::plus: 1818 case tok::minus: 1819 case tok::exclaim: 1820 case tok::tilde: 1821 case tok::slash: 1822 case tok::percent: 1823 case tok::lessless: 1824 case tok::pipe: 1825 case tok::pipepipe: 1826 case tok::ampamp: 1827 case tok::caret: 1828 case tok::equalequal: 1829 case tok::exclaimequal: 1830 case tok::greaterequal: 1831 case tok::lessequal: 1832 case tok::question: 1833 case tok::colon: 1834 case tok::ellipsis: 1835 case tok::kw_true: 1836 case tok::kw_false: 1837 if (SeenArrow) { 1838 nextToken(); 1839 break; 1840 } 1841 return true; 1842 case tok::arrow: 1843 // This might or might not actually be a lambda arrow (this could be an 1844 // ObjC method invocation followed by a dereferencing arrow). We might 1845 // reset this back to TT_Unknown in TokenAnnotator. 1846 FormatTok->setType(TT_LambdaArrow); 1847 SeenArrow = true; 1848 nextToken(); 1849 break; 1850 default: 1851 return true; 1852 } 1853 } 1854 FormatTok->setType(TT_LambdaLBrace); 1855 LSquare.setType(TT_LambdaLSquare); 1856 parseChildBlock(); 1857 return true; 1858 } 1859 1860 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 1861 const FormatToken *Previous = FormatTok->Previous; 1862 if (Previous && 1863 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, 1864 tok::kw_delete, tok::l_square) || 1865 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || 1866 Previous->isSimpleTypeSpecifier())) { 1867 nextToken(); 1868 return false; 1869 } 1870 nextToken(); 1871 if (FormatTok->is(tok::l_square)) { 1872 return false; 1873 } 1874 parseSquare(/*LambdaIntroducer=*/true); 1875 return true; 1876 } 1877 1878 void UnwrappedLineParser::tryToParseJSFunction() { 1879 assert(FormatTok->is(Keywords.kw_function) || 1880 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); 1881 if (FormatTok->is(Keywords.kw_async)) 1882 nextToken(); 1883 // Consume "function". 1884 nextToken(); 1885 1886 // Consume * (generator function). Treat it like C++'s overloaded operators. 1887 if (FormatTok->is(tok::star)) { 1888 FormatTok->setType(TT_OverloadedOperator); 1889 nextToken(); 1890 } 1891 1892 // Consume function name. 1893 if (FormatTok->is(tok::identifier)) 1894 nextToken(); 1895 1896 if (FormatTok->isNot(tok::l_paren)) 1897 return; 1898 1899 // Parse formal parameter list. 1900 parseParens(); 1901 1902 if (FormatTok->is(tok::colon)) { 1903 // Parse a type definition. 1904 nextToken(); 1905 1906 // Eat the type declaration. For braced inline object types, balance braces, 1907 // otherwise just parse until finding an l_brace for the function body. 1908 if (FormatTok->is(tok::l_brace)) 1909 tryToParseBracedList(); 1910 else 1911 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 1912 nextToken(); 1913 } 1914 1915 if (FormatTok->is(tok::semi)) 1916 return; 1917 1918 parseChildBlock(); 1919 } 1920 1921 bool UnwrappedLineParser::tryToParseBracedList() { 1922 if (FormatTok->is(BK_Unknown)) 1923 calculateBraceTypes(); 1924 assert(FormatTok->isNot(BK_Unknown)); 1925 if (FormatTok->is(BK_Block)) 1926 return false; 1927 nextToken(); 1928 parseBracedList(); 1929 return true; 1930 } 1931 1932 bool UnwrappedLineParser::tryToParseChildBlock() { 1933 assert(Style.isJavaScript() || Style.isCSharp()); 1934 assert(FormatTok->is(TT_FatArrow)); 1935 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 1936 // They always start an expression or a child block if followed by a curly 1937 // brace. 1938 nextToken(); 1939 if (FormatTok->isNot(tok::l_brace)) 1940 return false; 1941 parseChildBlock(); 1942 return true; 1943 } 1944 1945 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, 1946 bool IsEnum, 1947 tok::TokenKind ClosingBraceKind) { 1948 bool HasError = false; 1949 1950 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1951 // replace this by using parseAssignmentExpression() inside. 1952 do { 1953 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 1954 tryToParseChildBlock()) 1955 continue; 1956 if (Style.isJavaScript()) { 1957 if (FormatTok->is(Keywords.kw_function) || 1958 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { 1959 tryToParseJSFunction(); 1960 continue; 1961 } 1962 if (FormatTok->is(tok::l_brace)) { 1963 // Could be a method inside of a braced list `{a() { return 1; }}`. 1964 if (tryToParseBracedList()) 1965 continue; 1966 parseChildBlock(); 1967 } 1968 } 1969 if (FormatTok->Tok.getKind() == ClosingBraceKind) { 1970 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 1971 addUnwrappedLine(); 1972 nextToken(); 1973 return !HasError; 1974 } 1975 switch (FormatTok->Tok.getKind()) { 1976 case tok::l_square: 1977 if (Style.isCSharp()) 1978 parseSquare(); 1979 else 1980 tryToParseLambda(); 1981 break; 1982 case tok::l_paren: 1983 parseParens(); 1984 // JavaScript can just have free standing methods and getters/setters in 1985 // object literals. Detect them by a "{" following ")". 1986 if (Style.isJavaScript()) { 1987 if (FormatTok->is(tok::l_brace)) 1988 parseChildBlock(); 1989 break; 1990 } 1991 break; 1992 case tok::l_brace: 1993 // Assume there are no blocks inside a braced init list apart 1994 // from the ones we explicitly parse out (like lambdas). 1995 FormatTok->setBlockKind(BK_BracedInit); 1996 nextToken(); 1997 parseBracedList(); 1998 break; 1999 case tok::less: 2000 if (Style.Language == FormatStyle::LK_Proto) { 2001 nextToken(); 2002 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2003 /*ClosingBraceKind=*/tok::greater); 2004 } else { 2005 nextToken(); 2006 } 2007 break; 2008 case tok::semi: 2009 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2010 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2011 // used for error recovery if we have otherwise determined that this is 2012 // a braced list. 2013 if (Style.isJavaScript()) { 2014 nextToken(); 2015 break; 2016 } 2017 HasError = true; 2018 if (!ContinueOnSemicolons) 2019 return !HasError; 2020 nextToken(); 2021 break; 2022 case tok::comma: 2023 nextToken(); 2024 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2025 addUnwrappedLine(); 2026 break; 2027 default: 2028 nextToken(); 2029 break; 2030 } 2031 } while (!eof()); 2032 return false; 2033 } 2034 2035 void UnwrappedLineParser::parseParens() { 2036 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 2037 nextToken(); 2038 do { 2039 switch (FormatTok->Tok.getKind()) { 2040 case tok::l_paren: 2041 parseParens(); 2042 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2043 parseChildBlock(); 2044 break; 2045 case tok::r_paren: 2046 nextToken(); 2047 return; 2048 case tok::r_brace: 2049 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2050 return; 2051 case tok::l_square: 2052 tryToParseLambda(); 2053 break; 2054 case tok::l_brace: 2055 if (!tryToParseBracedList()) 2056 parseChildBlock(); 2057 break; 2058 case tok::at: 2059 nextToken(); 2060 if (FormatTok->Tok.is(tok::l_brace)) { 2061 nextToken(); 2062 parseBracedList(); 2063 } 2064 break; 2065 case tok::equal: 2066 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2067 tryToParseChildBlock(); 2068 else 2069 nextToken(); 2070 break; 2071 case tok::kw_class: 2072 if (Style.isJavaScript()) 2073 parseRecord(/*ParseAsExpr=*/true); 2074 else 2075 nextToken(); 2076 break; 2077 case tok::identifier: 2078 if (Style.isJavaScript() && 2079 (FormatTok->is(Keywords.kw_function) || 2080 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) 2081 tryToParseJSFunction(); 2082 else 2083 nextToken(); 2084 break; 2085 default: 2086 nextToken(); 2087 break; 2088 } 2089 } while (!eof()); 2090 } 2091 2092 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2093 if (!LambdaIntroducer) { 2094 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 2095 if (tryToParseLambda()) 2096 return; 2097 } 2098 do { 2099 switch (FormatTok->Tok.getKind()) { 2100 case tok::l_paren: 2101 parseParens(); 2102 break; 2103 case tok::r_square: 2104 nextToken(); 2105 return; 2106 case tok::r_brace: 2107 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2108 return; 2109 case tok::l_square: 2110 parseSquare(); 2111 break; 2112 case tok::l_brace: { 2113 if (!tryToParseBracedList()) 2114 parseChildBlock(); 2115 break; 2116 } 2117 case tok::at: 2118 nextToken(); 2119 if (FormatTok->Tok.is(tok::l_brace)) { 2120 nextToken(); 2121 parseBracedList(); 2122 } 2123 break; 2124 default: 2125 nextToken(); 2126 break; 2127 } 2128 } while (!eof()); 2129 } 2130 2131 void UnwrappedLineParser::parseIfThenElse() { 2132 auto HandleAttributes = [this]() { 2133 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2134 if (FormatTok->is(TT_AttributeMacro)) 2135 nextToken(); 2136 // Handle [[likely]] / [[unlikely]] attributes. 2137 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) 2138 parseSquare(); 2139 }; 2140 2141 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 2142 nextToken(); 2143 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier)) 2144 nextToken(); 2145 if (FormatTok->Tok.is(tok::l_paren)) 2146 parseParens(); 2147 HandleAttributes(); 2148 bool NeedsUnwrappedLine = false; 2149 if (FormatTok->Tok.is(tok::l_brace)) { 2150 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2151 parseBlock(); 2152 if (Style.BraceWrapping.BeforeElse) 2153 addUnwrappedLine(); 2154 else 2155 NeedsUnwrappedLine = true; 2156 } else { 2157 addUnwrappedLine(); 2158 ++Line->Level; 2159 parseStructuralElement(); 2160 --Line->Level; 2161 } 2162 if (FormatTok->Tok.is(tok::kw_else)) { 2163 nextToken(); 2164 HandleAttributes(); 2165 if (FormatTok->Tok.is(tok::l_brace)) { 2166 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2167 parseBlock(); 2168 addUnwrappedLine(); 2169 } else if (FormatTok->Tok.is(tok::kw_if)) { 2170 FormatToken *Previous = Tokens->getPreviousToken(); 2171 bool PrecededByComment = Previous && Previous->is(tok::comment); 2172 if (PrecededByComment) { 2173 addUnwrappedLine(); 2174 ++Line->Level; 2175 } 2176 parseIfThenElse(); 2177 if (PrecededByComment) 2178 --Line->Level; 2179 } else { 2180 addUnwrappedLine(); 2181 ++Line->Level; 2182 parseStructuralElement(); 2183 if (FormatTok->is(tok::eof)) 2184 addUnwrappedLine(); 2185 --Line->Level; 2186 } 2187 } else if (NeedsUnwrappedLine) { 2188 addUnwrappedLine(); 2189 } 2190 } 2191 2192 void UnwrappedLineParser::parseTryCatch() { 2193 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2194 nextToken(); 2195 bool NeedsUnwrappedLine = false; 2196 if (FormatTok->is(tok::colon)) { 2197 // We are in a function try block, what comes is an initializer list. 2198 nextToken(); 2199 2200 // In case identifiers were removed by clang-tidy, what might follow is 2201 // multiple commas in sequence - before the first identifier. 2202 while (FormatTok->is(tok::comma)) 2203 nextToken(); 2204 2205 while (FormatTok->is(tok::identifier)) { 2206 nextToken(); 2207 if (FormatTok->is(tok::l_paren)) 2208 parseParens(); 2209 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) && 2210 FormatTok->is(tok::l_brace)) { 2211 do { 2212 nextToken(); 2213 } while (!FormatTok->is(tok::r_brace)); 2214 nextToken(); 2215 } 2216 2217 // In case identifiers were removed by clang-tidy, what might follow is 2218 // multiple commas in sequence - after the first identifier. 2219 while (FormatTok->is(tok::comma)) 2220 nextToken(); 2221 } 2222 } 2223 // Parse try with resource. 2224 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 2225 parseParens(); 2226 } 2227 if (FormatTok->is(tok::l_brace)) { 2228 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2229 parseBlock(); 2230 if (Style.BraceWrapping.BeforeCatch) { 2231 addUnwrappedLine(); 2232 } else { 2233 NeedsUnwrappedLine = true; 2234 } 2235 } else if (!FormatTok->is(tok::kw_catch)) { 2236 // The C++ standard requires a compound-statement after a try. 2237 // If there's none, we try to assume there's a structuralElement 2238 // and try to continue. 2239 addUnwrappedLine(); 2240 ++Line->Level; 2241 parseStructuralElement(); 2242 --Line->Level; 2243 } 2244 while (1) { 2245 if (FormatTok->is(tok::at)) 2246 nextToken(); 2247 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 2248 tok::kw___finally) || 2249 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2250 FormatTok->is(Keywords.kw_finally)) || 2251 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 2252 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 2253 break; 2254 nextToken(); 2255 while (FormatTok->isNot(tok::l_brace)) { 2256 if (FormatTok->is(tok::l_paren)) { 2257 parseParens(); 2258 continue; 2259 } 2260 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 2261 return; 2262 nextToken(); 2263 } 2264 NeedsUnwrappedLine = false; 2265 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2266 parseBlock(); 2267 if (Style.BraceWrapping.BeforeCatch) 2268 addUnwrappedLine(); 2269 else 2270 NeedsUnwrappedLine = true; 2271 } 2272 if (NeedsUnwrappedLine) 2273 addUnwrappedLine(); 2274 } 2275 2276 void UnwrappedLineParser::parseNamespace() { 2277 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 2278 "'namespace' expected"); 2279 2280 const FormatToken &InitialToken = *FormatTok; 2281 nextToken(); 2282 if (InitialToken.is(TT_NamespaceMacro)) { 2283 parseParens(); 2284 } else { 2285 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 2286 tok::l_square, tok::period)) { 2287 if (FormatTok->is(tok::l_square)) 2288 parseSquare(); 2289 else 2290 nextToken(); 2291 } 2292 } 2293 if (FormatTok->Tok.is(tok::l_brace)) { 2294 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2295 addUnwrappedLine(); 2296 2297 unsigned AddLevels = 2298 Style.NamespaceIndentation == FormatStyle::NI_All || 2299 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 2300 DeclarationScopeStack.size() > 1) 2301 ? 1u 2302 : 0u; 2303 bool ManageWhitesmithsBraces = 2304 AddLevels == 0u && 2305 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 2306 2307 // If we're in Whitesmiths mode, indent the brace if we're not indenting 2308 // the whole block. 2309 if (ManageWhitesmithsBraces) 2310 ++Line->Level; 2311 2312 parseBlock(/*MustBeDeclaration=*/true, AddLevels, 2313 /*MunchSemi=*/true, 2314 /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces); 2315 2316 // Munch the semicolon after a namespace. This is more common than one would 2317 // think. Putting the semicolon into its own line is very ugly. 2318 if (FormatTok->Tok.is(tok::semi)) 2319 nextToken(); 2320 2321 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 2322 2323 if (ManageWhitesmithsBraces) 2324 --Line->Level; 2325 } 2326 // FIXME: Add error handling. 2327 } 2328 2329 void UnwrappedLineParser::parseNew() { 2330 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 2331 nextToken(); 2332 2333 if (Style.isCSharp()) { 2334 do { 2335 if (FormatTok->is(tok::l_brace)) 2336 parseBracedList(); 2337 2338 if (FormatTok->isOneOf(tok::semi, tok::comma)) 2339 return; 2340 2341 nextToken(); 2342 } while (!eof()); 2343 } 2344 2345 if (Style.Language != FormatStyle::LK_Java) 2346 return; 2347 2348 // In Java, we can parse everything up to the parens, which aren't optional. 2349 do { 2350 // There should not be a ;, { or } before the new's open paren. 2351 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 2352 return; 2353 2354 // Consume the parens. 2355 if (FormatTok->is(tok::l_paren)) { 2356 parseParens(); 2357 2358 // If there is a class body of an anonymous class, consume that as child. 2359 if (FormatTok->is(tok::l_brace)) 2360 parseChildBlock(); 2361 return; 2362 } 2363 nextToken(); 2364 } while (!eof()); 2365 } 2366 2367 void UnwrappedLineParser::parseForOrWhileLoop() { 2368 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 2369 "'for', 'while' or foreach macro expected"); 2370 nextToken(); 2371 // JS' for await ( ... 2372 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 2373 nextToken(); 2374 if (Style.isCpp() && FormatTok->is(tok::kw_co_await)) 2375 nextToken(); 2376 if (FormatTok->Tok.is(tok::l_paren)) 2377 parseParens(); 2378 if (FormatTok->Tok.is(tok::l_brace)) { 2379 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2380 parseBlock(); 2381 addUnwrappedLine(); 2382 } else { 2383 addUnwrappedLine(); 2384 ++Line->Level; 2385 parseStructuralElement(); 2386 --Line->Level; 2387 } 2388 } 2389 2390 void UnwrappedLineParser::parseDoWhile() { 2391 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 2392 nextToken(); 2393 if (FormatTok->Tok.is(tok::l_brace)) { 2394 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2395 parseBlock(); 2396 if (Style.BraceWrapping.BeforeWhile) 2397 addUnwrappedLine(); 2398 } else { 2399 addUnwrappedLine(); 2400 ++Line->Level; 2401 parseStructuralElement(); 2402 --Line->Level; 2403 } 2404 2405 // FIXME: Add error handling. 2406 if (!FormatTok->Tok.is(tok::kw_while)) { 2407 addUnwrappedLine(); 2408 return; 2409 } 2410 2411 // If in Whitesmiths mode, the line with the while() needs to be indented 2412 // to the same level as the block. 2413 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 2414 ++Line->Level; 2415 2416 nextToken(); 2417 parseStructuralElement(); 2418 } 2419 2420 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 2421 nextToken(); 2422 unsigned OldLineLevel = Line->Level; 2423 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 2424 --Line->Level; 2425 if (LeftAlignLabel) 2426 Line->Level = 0; 2427 2428 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 2429 FormatTok->Tok.is(tok::l_brace)) { 2430 2431 CompoundStatementIndenter Indenter(this, Line->Level, 2432 Style.BraceWrapping.AfterCaseLabel, 2433 Style.BraceWrapping.IndentBraces); 2434 parseBlock(); 2435 if (FormatTok->Tok.is(tok::kw_break)) { 2436 if (Style.BraceWrapping.AfterControlStatement == 2437 FormatStyle::BWACS_Always) { 2438 addUnwrappedLine(); 2439 if (!Style.IndentCaseBlocks && 2440 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 2441 Line->Level++; 2442 } 2443 } 2444 parseStructuralElement(); 2445 } 2446 addUnwrappedLine(); 2447 } else { 2448 if (FormatTok->is(tok::semi)) 2449 nextToken(); 2450 addUnwrappedLine(); 2451 } 2452 Line->Level = OldLineLevel; 2453 if (FormatTok->isNot(tok::l_brace)) { 2454 parseStructuralElement(); 2455 addUnwrappedLine(); 2456 } 2457 } 2458 2459 void UnwrappedLineParser::parseCaseLabel() { 2460 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 2461 2462 // FIXME: fix handling of complex expressions here. 2463 do { 2464 nextToken(); 2465 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 2466 parseLabel(); 2467 } 2468 2469 void UnwrappedLineParser::parseSwitch() { 2470 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 2471 nextToken(); 2472 if (FormatTok->Tok.is(tok::l_paren)) 2473 parseParens(); 2474 if (FormatTok->Tok.is(tok::l_brace)) { 2475 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2476 parseBlock(); 2477 addUnwrappedLine(); 2478 } else { 2479 addUnwrappedLine(); 2480 ++Line->Level; 2481 parseStructuralElement(); 2482 --Line->Level; 2483 } 2484 } 2485 2486 void UnwrappedLineParser::parseAccessSpecifier() { 2487 nextToken(); 2488 // Understand Qt's slots. 2489 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 2490 nextToken(); 2491 // Otherwise, we don't know what it is, and we'd better keep the next token. 2492 if (FormatTok->Tok.is(tok::colon)) 2493 nextToken(); 2494 addUnwrappedLine(); 2495 } 2496 2497 void UnwrappedLineParser::parseConcept() { 2498 assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected"); 2499 nextToken(); 2500 if (!FormatTok->Tok.is(tok::identifier)) 2501 return; 2502 nextToken(); 2503 if (!FormatTok->Tok.is(tok::equal)) 2504 return; 2505 nextToken(); 2506 if (FormatTok->Tok.is(tok::kw_requires)) { 2507 nextToken(); 2508 parseRequiresExpression(Line->Level); 2509 } else { 2510 parseConstraintExpression(Line->Level); 2511 } 2512 } 2513 2514 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) { 2515 // requires (R range) 2516 if (FormatTok->Tok.is(tok::l_paren)) { 2517 parseParens(); 2518 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2519 addUnwrappedLine(); 2520 --Line->Level; 2521 } 2522 } 2523 2524 if (FormatTok->Tok.is(tok::l_brace)) { 2525 if (Style.BraceWrapping.AfterFunction) 2526 addUnwrappedLine(); 2527 FormatTok->setType(TT_FunctionLBrace); 2528 parseBlock(); 2529 addUnwrappedLine(); 2530 } else { 2531 parseConstraintExpression(OriginalLevel); 2532 } 2533 } 2534 2535 void UnwrappedLineParser::parseConstraintExpression( 2536 unsigned int OriginalLevel) { 2537 // requires Id<T> && Id<T> || Id<T> 2538 while ( 2539 FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) { 2540 nextToken(); 2541 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less, 2542 tok::greater, tok::comma, tok::ellipsis)) { 2543 if (FormatTok->Tok.is(tok::less)) { 2544 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2545 /*ClosingBraceKind=*/tok::greater); 2546 continue; 2547 } 2548 nextToken(); 2549 } 2550 if (FormatTok->Tok.is(tok::kw_requires)) { 2551 parseRequiresExpression(OriginalLevel); 2552 } 2553 if (FormatTok->Tok.is(tok::less)) { 2554 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false, 2555 /*ClosingBraceKind=*/tok::greater); 2556 } 2557 2558 if (FormatTok->Tok.is(tok::l_paren)) { 2559 parseParens(); 2560 } 2561 if (FormatTok->Tok.is(tok::l_brace)) { 2562 if (Style.BraceWrapping.AfterFunction) 2563 addUnwrappedLine(); 2564 FormatTok->setType(TT_FunctionLBrace); 2565 parseBlock(); 2566 } 2567 if (FormatTok->Tok.is(tok::semi)) { 2568 // Eat any trailing semi. 2569 nextToken(); 2570 addUnwrappedLine(); 2571 } 2572 if (FormatTok->Tok.is(tok::colon)) { 2573 return; 2574 } 2575 if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) { 2576 if (FormatTok->Previous && 2577 !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires, 2578 tok::coloncolon)) { 2579 addUnwrappedLine(); 2580 } 2581 if (Style.IndentRequires && OriginalLevel != Line->Level) { 2582 --Line->Level; 2583 } 2584 break; 2585 } else { 2586 FormatTok->setType(TT_ConstraintJunctions); 2587 } 2588 2589 nextToken(); 2590 } 2591 } 2592 2593 void UnwrappedLineParser::parseRequires() { 2594 assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected"); 2595 2596 unsigned OriginalLevel = Line->Level; 2597 if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) { 2598 addUnwrappedLine(); 2599 if (Style.IndentRequires) { 2600 Line->Level++; 2601 } 2602 } 2603 nextToken(); 2604 2605 parseRequiresExpression(OriginalLevel); 2606 } 2607 2608 bool UnwrappedLineParser::parseEnum() { 2609 // Won't be 'enum' for NS_ENUMs. 2610 if (FormatTok->Tok.is(tok::kw_enum)) 2611 nextToken(); 2612 2613 const FormatToken &InitialToken = *FormatTok; 2614 2615 // In TypeScript, "enum" can also be used as property name, e.g. in interface 2616 // declarations. An "enum" keyword followed by a colon would be a syntax 2617 // error and thus assume it is just an identifier. 2618 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 2619 return false; 2620 2621 // In protobuf, "enum" can be used as a field name. 2622 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 2623 return false; 2624 2625 // Eat up enum class ... 2626 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 2627 nextToken(); 2628 2629 while (FormatTok->Tok.getIdentifierInfo() || 2630 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 2631 tok::greater, tok::comma, tok::question)) { 2632 nextToken(); 2633 // We can have macros or attributes in between 'enum' and the enum name. 2634 if (FormatTok->is(tok::l_paren)) 2635 parseParens(); 2636 if (FormatTok->is(tok::identifier)) { 2637 nextToken(); 2638 // If there are two identifiers in a row, this is likely an elaborate 2639 // return type. In Java, this can be "implements", etc. 2640 if (Style.isCpp() && FormatTok->is(tok::identifier)) 2641 return false; 2642 } 2643 } 2644 2645 // Just a declaration or something is wrong. 2646 if (FormatTok->isNot(tok::l_brace)) 2647 return true; 2648 FormatTok->setBlockKind(BK_Block); 2649 2650 if (Style.Language == FormatStyle::LK_Java) { 2651 // Java enums are different. 2652 parseJavaEnumBody(); 2653 return true; 2654 } 2655 if (Style.Language == FormatStyle::LK_Proto) { 2656 parseBlock(/*MustBeDeclaration=*/true); 2657 return true; 2658 } 2659 2660 if (!Style.AllowShortEnumsOnASingleLine && 2661 ShouldBreakBeforeBrace(Style, InitialToken)) 2662 addUnwrappedLine(); 2663 // Parse enum body. 2664 nextToken(); 2665 if (!Style.AllowShortEnumsOnASingleLine) { 2666 addUnwrappedLine(); 2667 Line->Level += 1; 2668 } 2669 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true, 2670 /*IsEnum=*/true); 2671 if (!Style.AllowShortEnumsOnASingleLine) 2672 Line->Level -= 1; 2673 if (HasError) { 2674 if (FormatTok->is(tok::semi)) 2675 nextToken(); 2676 addUnwrappedLine(); 2677 } 2678 return true; 2679 2680 // There is no addUnwrappedLine() here so that we fall through to parsing a 2681 // structural element afterwards. Thus, in "enum A {} n, m;", 2682 // "} n, m;" will end up in one unwrapped line. 2683 } 2684 2685 bool UnwrappedLineParser::parseStructLike() { 2686 // parseRecord falls through and does not yet add an unwrapped line as a 2687 // record declaration or definition can start a structural element. 2688 parseRecord(); 2689 // This does not apply to Java, JavaScript and C#. 2690 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 2691 Style.isCSharp()) { 2692 if (FormatTok->is(tok::semi)) 2693 nextToken(); 2694 addUnwrappedLine(); 2695 return true; 2696 } 2697 return false; 2698 } 2699 2700 namespace { 2701 // A class used to set and restore the Token position when peeking 2702 // ahead in the token source. 2703 class ScopedTokenPosition { 2704 unsigned StoredPosition; 2705 FormatTokenSource *Tokens; 2706 2707 public: 2708 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 2709 assert(Tokens && "Tokens expected to not be null"); 2710 StoredPosition = Tokens->getPosition(); 2711 } 2712 2713 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 2714 }; 2715 } // namespace 2716 2717 // Look to see if we have [[ by looking ahead, if 2718 // its not then rewind to the original position. 2719 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 2720 ScopedTokenPosition AutoPosition(Tokens); 2721 FormatToken *Tok = Tokens->getNextToken(); 2722 // We already read the first [ check for the second. 2723 if (!Tok->is(tok::l_square)) { 2724 return false; 2725 } 2726 // Double check that the attribute is just something 2727 // fairly simple. 2728 while (Tok->isNot(tok::eof)) { 2729 if (Tok->is(tok::r_square)) { 2730 break; 2731 } 2732 Tok = Tokens->getNextToken(); 2733 } 2734 if (Tok->is(tok::eof)) 2735 return false; 2736 Tok = Tokens->getNextToken(); 2737 if (!Tok->is(tok::r_square)) { 2738 return false; 2739 } 2740 Tok = Tokens->getNextToken(); 2741 if (Tok->is(tok::semi)) { 2742 return false; 2743 } 2744 return true; 2745 } 2746 2747 void UnwrappedLineParser::parseJavaEnumBody() { 2748 // Determine whether the enum is simple, i.e. does not have a semicolon or 2749 // constants with class bodies. Simple enums can be formatted like braced 2750 // lists, contracted to a single line, etc. 2751 unsigned StoredPosition = Tokens->getPosition(); 2752 bool IsSimple = true; 2753 FormatToken *Tok = Tokens->getNextToken(); 2754 while (!Tok->is(tok::eof)) { 2755 if (Tok->is(tok::r_brace)) 2756 break; 2757 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 2758 IsSimple = false; 2759 break; 2760 } 2761 // FIXME: This will also mark enums with braces in the arguments to enum 2762 // constants as "not simple". This is probably fine in practice, though. 2763 Tok = Tokens->getNextToken(); 2764 } 2765 FormatTok = Tokens->setPosition(StoredPosition); 2766 2767 if (IsSimple) { 2768 nextToken(); 2769 parseBracedList(); 2770 addUnwrappedLine(); 2771 return; 2772 } 2773 2774 // Parse the body of a more complex enum. 2775 // First add a line for everything up to the "{". 2776 nextToken(); 2777 addUnwrappedLine(); 2778 ++Line->Level; 2779 2780 // Parse the enum constants. 2781 while (FormatTok) { 2782 if (FormatTok->is(tok::l_brace)) { 2783 // Parse the constant's class body. 2784 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 2785 /*MunchSemi=*/false); 2786 } else if (FormatTok->is(tok::l_paren)) { 2787 parseParens(); 2788 } else if (FormatTok->is(tok::comma)) { 2789 nextToken(); 2790 addUnwrappedLine(); 2791 } else if (FormatTok->is(tok::semi)) { 2792 nextToken(); 2793 addUnwrappedLine(); 2794 break; 2795 } else if (FormatTok->is(tok::r_brace)) { 2796 addUnwrappedLine(); 2797 break; 2798 } else { 2799 nextToken(); 2800 } 2801 } 2802 2803 // Parse the class body after the enum's ";" if any. 2804 parseLevel(/*HasOpeningBrace=*/true); 2805 nextToken(); 2806 --Line->Level; 2807 addUnwrappedLine(); 2808 } 2809 2810 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 2811 const FormatToken &InitialToken = *FormatTok; 2812 nextToken(); 2813 2814 // The actual identifier can be a nested name specifier, and in macros 2815 // it is often token-pasted. 2816 // An [[attribute]] can be before the identifier. 2817 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 2818 tok::kw___attribute, tok::kw___declspec, 2819 tok::kw_alignas, tok::l_square, tok::r_square) || 2820 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 2821 FormatTok->isOneOf(tok::period, tok::comma))) { 2822 if (Style.isJavaScript() && 2823 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 2824 // JavaScript/TypeScript supports inline object types in 2825 // extends/implements positions: 2826 // class Foo implements {bar: number} { } 2827 nextToken(); 2828 if (FormatTok->is(tok::l_brace)) { 2829 tryToParseBracedList(); 2830 continue; 2831 } 2832 } 2833 bool IsNonMacroIdentifier = 2834 FormatTok->is(tok::identifier) && 2835 FormatTok->TokenText != FormatTok->TokenText.upper(); 2836 nextToken(); 2837 // We can have macros or attributes in between 'class' and the class name. 2838 if (!IsNonMacroIdentifier) { 2839 if (FormatTok->Tok.is(tok::l_paren)) { 2840 parseParens(); 2841 } else if (FormatTok->is(TT_AttributeSquare)) { 2842 parseSquare(); 2843 // Consume the closing TT_AttributeSquare. 2844 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare)) 2845 nextToken(); 2846 } 2847 } 2848 } 2849 2850 // Note that parsing away template declarations here leads to incorrectly 2851 // accepting function declarations as record declarations. 2852 // In general, we cannot solve this problem. Consider: 2853 // class A<int> B() {} 2854 // which can be a function definition or a class definition when B() is a 2855 // macro. If we find enough real-world cases where this is a problem, we 2856 // can parse for the 'template' keyword in the beginning of the statement, 2857 // and thus rule out the record production in case there is no template 2858 // (this would still leave us with an ambiguity between template function 2859 // and class declarations). 2860 if (FormatTok->isOneOf(tok::colon, tok::less)) { 2861 while (!eof()) { 2862 if (FormatTok->is(tok::l_brace)) { 2863 calculateBraceTypes(/*ExpectClassBody=*/true); 2864 if (!tryToParseBracedList()) 2865 break; 2866 } 2867 if (FormatTok->Tok.is(tok::semi)) 2868 return; 2869 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 2870 addUnwrappedLine(); 2871 nextToken(); 2872 parseCSharpGenericTypeConstraint(); 2873 break; 2874 } 2875 nextToken(); 2876 } 2877 } 2878 if (FormatTok->Tok.is(tok::l_brace)) { 2879 if (ParseAsExpr) { 2880 parseChildBlock(); 2881 } else { 2882 if (ShouldBreakBeforeBrace(Style, InitialToken)) 2883 addUnwrappedLine(); 2884 2885 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 2886 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 2887 } 2888 } 2889 // There is no addUnwrappedLine() here so that we fall through to parsing a 2890 // structural element afterwards. Thus, in "class A {} n, m;", 2891 // "} n, m;" will end up in one unwrapped line. 2892 } 2893 2894 void UnwrappedLineParser::parseObjCMethod() { 2895 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && 2896 "'(' or identifier expected."); 2897 do { 2898 if (FormatTok->Tok.is(tok::semi)) { 2899 nextToken(); 2900 addUnwrappedLine(); 2901 return; 2902 } else if (FormatTok->Tok.is(tok::l_brace)) { 2903 if (Style.BraceWrapping.AfterFunction) 2904 addUnwrappedLine(); 2905 parseBlock(); 2906 addUnwrappedLine(); 2907 return; 2908 } else { 2909 nextToken(); 2910 } 2911 } while (!eof()); 2912 } 2913 2914 void UnwrappedLineParser::parseObjCProtocolList() { 2915 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 2916 do { 2917 nextToken(); 2918 // Early exit in case someone forgot a close angle. 2919 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 2920 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 2921 return; 2922 } while (!eof() && FormatTok->Tok.isNot(tok::greater)); 2923 nextToken(); // Skip '>'. 2924 } 2925 2926 void UnwrappedLineParser::parseObjCUntilAtEnd() { 2927 do { 2928 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 2929 nextToken(); 2930 addUnwrappedLine(); 2931 break; 2932 } 2933 if (FormatTok->is(tok::l_brace)) { 2934 parseBlock(); 2935 // In ObjC interfaces, nothing should be following the "}". 2936 addUnwrappedLine(); 2937 } else if (FormatTok->is(tok::r_brace)) { 2938 // Ignore stray "}". parseStructuralElement doesn't consume them. 2939 nextToken(); 2940 addUnwrappedLine(); 2941 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 2942 nextToken(); 2943 parseObjCMethod(); 2944 } else { 2945 parseStructuralElement(); 2946 } 2947 } while (!eof()); 2948 } 2949 2950 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 2951 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 2952 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 2953 nextToken(); 2954 nextToken(); // interface name 2955 2956 // @interface can be followed by a lightweight generic 2957 // specialization list, then either a base class or a category. 2958 if (FormatTok->Tok.is(tok::less)) { 2959 parseObjCLightweightGenerics(); 2960 } 2961 if (FormatTok->Tok.is(tok::colon)) { 2962 nextToken(); 2963 nextToken(); // base class name 2964 // The base class can also have lightweight generics applied to it. 2965 if (FormatTok->Tok.is(tok::less)) { 2966 parseObjCLightweightGenerics(); 2967 } 2968 } else if (FormatTok->Tok.is(tok::l_paren)) 2969 // Skip category, if present. 2970 parseParens(); 2971 2972 if (FormatTok->Tok.is(tok::less)) 2973 parseObjCProtocolList(); 2974 2975 if (FormatTok->Tok.is(tok::l_brace)) { 2976 if (Style.BraceWrapping.AfterObjCDeclaration) 2977 addUnwrappedLine(); 2978 parseBlock(/*MustBeDeclaration=*/true); 2979 } 2980 2981 // With instance variables, this puts '}' on its own line. Without instance 2982 // variables, this ends the @interface line. 2983 addUnwrappedLine(); 2984 2985 parseObjCUntilAtEnd(); 2986 } 2987 2988 void UnwrappedLineParser::parseObjCLightweightGenerics() { 2989 assert(FormatTok->Tok.is(tok::less)); 2990 // Unlike protocol lists, generic parameterizations support 2991 // nested angles: 2992 // 2993 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 2994 // NSObject <NSCopying, NSSecureCoding> 2995 // 2996 // so we need to count how many open angles we have left. 2997 unsigned NumOpenAngles = 1; 2998 do { 2999 nextToken(); 3000 // Early exit in case someone forgot a close angle. 3001 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 3002 FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) 3003 break; 3004 if (FormatTok->Tok.is(tok::less)) 3005 ++NumOpenAngles; 3006 else if (FormatTok->Tok.is(tok::greater)) { 3007 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 3008 --NumOpenAngles; 3009 } 3010 } while (!eof() && NumOpenAngles != 0); 3011 nextToken(); // Skip '>'. 3012 } 3013 3014 // Returns true for the declaration/definition form of @protocol, 3015 // false for the expression form. 3016 bool UnwrappedLineParser::parseObjCProtocol() { 3017 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 3018 nextToken(); 3019 3020 if (FormatTok->is(tok::l_paren)) 3021 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 3022 return false; 3023 3024 // The definition/declaration form, 3025 // @protocol Foo 3026 // - (int)someMethod; 3027 // @end 3028 3029 nextToken(); // protocol name 3030 3031 if (FormatTok->Tok.is(tok::less)) 3032 parseObjCProtocolList(); 3033 3034 // Check for protocol declaration. 3035 if (FormatTok->Tok.is(tok::semi)) { 3036 nextToken(); 3037 addUnwrappedLine(); 3038 return true; 3039 } 3040 3041 addUnwrappedLine(); 3042 parseObjCUntilAtEnd(); 3043 return true; 3044 } 3045 3046 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 3047 bool IsImport = FormatTok->is(Keywords.kw_import); 3048 assert(IsImport || FormatTok->is(tok::kw_export)); 3049 nextToken(); 3050 3051 // Consume the "default" in "export default class/function". 3052 if (FormatTok->is(tok::kw_default)) 3053 nextToken(); 3054 3055 // Consume "async function", "function" and "default function", so that these 3056 // get parsed as free-standing JS functions, i.e. do not require a trailing 3057 // semicolon. 3058 if (FormatTok->is(Keywords.kw_async)) 3059 nextToken(); 3060 if (FormatTok->is(Keywords.kw_function)) { 3061 nextToken(); 3062 return; 3063 } 3064 3065 // For imports, `export *`, `export {...}`, consume the rest of the line up 3066 // to the terminating `;`. For everything else, just return and continue 3067 // parsing the structural element, i.e. the declaration or expression for 3068 // `export default`. 3069 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 3070 !FormatTok->isStringLiteral()) 3071 return; 3072 3073 while (!eof()) { 3074 if (FormatTok->is(tok::semi)) 3075 return; 3076 if (Line->Tokens.empty()) { 3077 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 3078 // import statement should terminate. 3079 return; 3080 } 3081 if (FormatTok->is(tok::l_brace)) { 3082 FormatTok->setBlockKind(BK_Block); 3083 nextToken(); 3084 parseBracedList(); 3085 } else { 3086 nextToken(); 3087 } 3088 } 3089 } 3090 3091 void UnwrappedLineParser::parseStatementMacro() { 3092 nextToken(); 3093 if (FormatTok->is(tok::l_paren)) 3094 parseParens(); 3095 if (FormatTok->is(tok::semi)) 3096 nextToken(); 3097 addUnwrappedLine(); 3098 } 3099 3100 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 3101 StringRef Prefix = "") { 3102 llvm::dbgs() << Prefix << "Line(" << Line.Level 3103 << ", FSC=" << Line.FirstStartColumn << ")" 3104 << (Line.InPPDirective ? " MACRO" : "") << ": "; 3105 for (const auto &Node : Line.Tokens) { 3106 llvm::dbgs() << Node.Tok->Tok.getName() << "[" 3107 << "T=" << static_cast<unsigned>(Node.Tok->getType()) 3108 << ", OC=" << Node.Tok->OriginalColumn << "] "; 3109 } 3110 for (const auto &Node : Line.Tokens) 3111 for (const auto &ChildNode : Node.Children) 3112 printDebugInfo(ChildNode, "\nChild: "); 3113 3114 llvm::dbgs() << "\n"; 3115 } 3116 3117 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 3118 if (Line->Tokens.empty()) 3119 return; 3120 LLVM_DEBUG({ 3121 if (CurrentLines == &Lines) 3122 printDebugInfo(*Line); 3123 }); 3124 3125 // If this line closes a block when in Whitesmiths mode, remember that 3126 // information so that the level can be decreased after the line is added. 3127 // This has to happen after the addition of the line since the line itself 3128 // needs to be indented. 3129 bool ClosesWhitesmithsBlock = 3130 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 3131 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3132 3133 CurrentLines->push_back(std::move(*Line)); 3134 Line->Tokens.clear(); 3135 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 3136 Line->FirstStartColumn = 0; 3137 3138 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 3139 --Line->Level; 3140 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 3141 CurrentLines->append( 3142 std::make_move_iterator(PreprocessorDirectives.begin()), 3143 std::make_move_iterator(PreprocessorDirectives.end())); 3144 PreprocessorDirectives.clear(); 3145 } 3146 // Disconnect the current token from the last token on the previous line. 3147 FormatTok->Previous = nullptr; 3148 } 3149 3150 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 3151 3152 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 3153 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 3154 FormatTok.NewlinesBefore > 0; 3155 } 3156 3157 // Checks if \p FormatTok is a line comment that continues the line comment 3158 // section on \p Line. 3159 static bool 3160 continuesLineCommentSection(const FormatToken &FormatTok, 3161 const UnwrappedLine &Line, 3162 const llvm::Regex &CommentPragmasRegex) { 3163 if (Line.Tokens.empty()) 3164 return false; 3165 3166 StringRef IndentContent = FormatTok.TokenText; 3167 if (FormatTok.TokenText.startswith("//") || 3168 FormatTok.TokenText.startswith("/*")) 3169 IndentContent = FormatTok.TokenText.substr(2); 3170 if (CommentPragmasRegex.match(IndentContent)) 3171 return false; 3172 3173 // If Line starts with a line comment, then FormatTok continues the comment 3174 // section if its original column is greater or equal to the original start 3175 // column of the line. 3176 // 3177 // Define the min column token of a line as follows: if a line ends in '{' or 3178 // contains a '{' followed by a line comment, then the min column token is 3179 // that '{'. Otherwise, the min column token of the line is the first token of 3180 // the line. 3181 // 3182 // If Line starts with a token other than a line comment, then FormatTok 3183 // continues the comment section if its original column is greater than the 3184 // original start column of the min column token of the line. 3185 // 3186 // For example, the second line comment continues the first in these cases: 3187 // 3188 // // first line 3189 // // second line 3190 // 3191 // and: 3192 // 3193 // // first line 3194 // // second line 3195 // 3196 // and: 3197 // 3198 // int i; // first line 3199 // // second line 3200 // 3201 // and: 3202 // 3203 // do { // first line 3204 // // second line 3205 // int i; 3206 // } while (true); 3207 // 3208 // and: 3209 // 3210 // enum { 3211 // a, // first line 3212 // // second line 3213 // b 3214 // }; 3215 // 3216 // The second line comment doesn't continue the first in these cases: 3217 // 3218 // // first line 3219 // // second line 3220 // 3221 // and: 3222 // 3223 // int i; // first line 3224 // // second line 3225 // 3226 // and: 3227 // 3228 // do { // first line 3229 // // second line 3230 // int i; 3231 // } while (true); 3232 // 3233 // and: 3234 // 3235 // enum { 3236 // a, // first line 3237 // // second line 3238 // }; 3239 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 3240 3241 // Scan for '{//'. If found, use the column of '{' as a min column for line 3242 // comment section continuation. 3243 const FormatToken *PreviousToken = nullptr; 3244 for (const UnwrappedLineNode &Node : Line.Tokens) { 3245 if (PreviousToken && PreviousToken->is(tok::l_brace) && 3246 isLineComment(*Node.Tok)) { 3247 MinColumnToken = PreviousToken; 3248 break; 3249 } 3250 PreviousToken = Node.Tok; 3251 3252 // Grab the last newline preceding a token in this unwrapped line. 3253 if (Node.Tok->NewlinesBefore > 0) { 3254 MinColumnToken = Node.Tok; 3255 } 3256 } 3257 if (PreviousToken && PreviousToken->is(tok::l_brace)) { 3258 MinColumnToken = PreviousToken; 3259 } 3260 3261 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 3262 MinColumnToken); 3263 } 3264 3265 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 3266 bool JustComments = Line->Tokens.empty(); 3267 for (SmallVectorImpl<FormatToken *>::const_iterator 3268 I = CommentsBeforeNextToken.begin(), 3269 E = CommentsBeforeNextToken.end(); 3270 I != E; ++I) { 3271 // Line comments that belong to the same line comment section are put on the 3272 // same line since later we might want to reflow content between them. 3273 // Additional fine-grained breaking of line comment sections is controlled 3274 // by the class BreakableLineCommentSection in case it is desirable to keep 3275 // several line comment sections in the same unwrapped line. 3276 // 3277 // FIXME: Consider putting separate line comment sections as children to the 3278 // unwrapped line instead. 3279 (*I)->ContinuesLineCommentSection = 3280 continuesLineCommentSection(**I, *Line, CommentPragmasRegex); 3281 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) 3282 addUnwrappedLine(); 3283 pushToken(*I); 3284 } 3285 if (NewlineBeforeNext && JustComments) 3286 addUnwrappedLine(); 3287 CommentsBeforeNextToken.clear(); 3288 } 3289 3290 void UnwrappedLineParser::nextToken(int LevelDifference) { 3291 if (eof()) 3292 return; 3293 flushComments(isOnNewLine(*FormatTok)); 3294 pushToken(FormatTok); 3295 FormatToken *Previous = FormatTok; 3296 if (!Style.isJavaScript()) 3297 readToken(LevelDifference); 3298 else 3299 readTokenWithJavaScriptASI(); 3300 FormatTok->Previous = Previous; 3301 } 3302 3303 void UnwrappedLineParser::distributeComments( 3304 const SmallVectorImpl<FormatToken *> &Comments, 3305 const FormatToken *NextTok) { 3306 // Whether or not a line comment token continues a line is controlled by 3307 // the method continuesLineCommentSection, with the following caveat: 3308 // 3309 // Define a trail of Comments to be a nonempty proper postfix of Comments such 3310 // that each comment line from the trail is aligned with the next token, if 3311 // the next token exists. If a trail exists, the beginning of the maximal 3312 // trail is marked as a start of a new comment section. 3313 // 3314 // For example in this code: 3315 // 3316 // int a; // line about a 3317 // // line 1 about b 3318 // // line 2 about b 3319 // int b; 3320 // 3321 // the two lines about b form a maximal trail, so there are two sections, the 3322 // first one consisting of the single comment "// line about a" and the 3323 // second one consisting of the next two comments. 3324 if (Comments.empty()) 3325 return; 3326 bool ShouldPushCommentsInCurrentLine = true; 3327 bool HasTrailAlignedWithNextToken = false; 3328 unsigned StartOfTrailAlignedWithNextToken = 0; 3329 if (NextTok) { 3330 // We are skipping the first element intentionally. 3331 for (unsigned i = Comments.size() - 1; i > 0; --i) { 3332 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 3333 HasTrailAlignedWithNextToken = true; 3334 StartOfTrailAlignedWithNextToken = i; 3335 } 3336 } 3337 } 3338 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 3339 FormatToken *FormatTok = Comments[i]; 3340 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 3341 FormatTok->ContinuesLineCommentSection = false; 3342 } else { 3343 FormatTok->ContinuesLineCommentSection = 3344 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 3345 } 3346 if (!FormatTok->ContinuesLineCommentSection && 3347 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 3348 ShouldPushCommentsInCurrentLine = false; 3349 } 3350 if (ShouldPushCommentsInCurrentLine) { 3351 pushToken(FormatTok); 3352 } else { 3353 CommentsBeforeNextToken.push_back(FormatTok); 3354 } 3355 } 3356 } 3357 3358 void UnwrappedLineParser::readToken(int LevelDifference) { 3359 SmallVector<FormatToken *, 1> Comments; 3360 do { 3361 FormatTok = Tokens->getNextToken(); 3362 assert(FormatTok); 3363 while (FormatTok->getType() == TT_ConflictStart || 3364 FormatTok->getType() == TT_ConflictEnd || 3365 FormatTok->getType() == TT_ConflictAlternative) { 3366 if (FormatTok->getType() == TT_ConflictStart) { 3367 conditionalCompilationStart(/*Unreachable=*/false); 3368 } else if (FormatTok->getType() == TT_ConflictAlternative) { 3369 conditionalCompilationAlternative(); 3370 } else if (FormatTok->getType() == TT_ConflictEnd) { 3371 conditionalCompilationEnd(); 3372 } 3373 FormatTok = Tokens->getNextToken(); 3374 FormatTok->MustBreakBefore = true; 3375 } 3376 3377 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 3378 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 3379 distributeComments(Comments, FormatTok); 3380 Comments.clear(); 3381 // If there is an unfinished unwrapped line, we flush the preprocessor 3382 // directives only after that unwrapped line was finished later. 3383 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 3384 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 3385 assert((LevelDifference >= 0 || 3386 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 3387 "LevelDifference makes Line->Level negative"); 3388 Line->Level += LevelDifference; 3389 // Comments stored before the preprocessor directive need to be output 3390 // before the preprocessor directive, at the same level as the 3391 // preprocessor directive, as we consider them to apply to the directive. 3392 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 3393 PPBranchLevel > 0) 3394 Line->Level += PPBranchLevel; 3395 flushComments(isOnNewLine(*FormatTok)); 3396 parsePPDirective(); 3397 } 3398 3399 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 3400 !Line->InPPDirective) { 3401 continue; 3402 } 3403 3404 if (!FormatTok->Tok.is(tok::comment)) { 3405 distributeComments(Comments, FormatTok); 3406 Comments.clear(); 3407 return; 3408 } 3409 3410 Comments.push_back(FormatTok); 3411 } while (!eof()); 3412 3413 distributeComments(Comments, nullptr); 3414 Comments.clear(); 3415 } 3416 3417 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 3418 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 3419 if (MustBreakBeforeNextToken) { 3420 Line->Tokens.back().Tok->MustBreakBefore = true; 3421 MustBreakBeforeNextToken = false; 3422 } 3423 } 3424 3425 } // end namespace format 3426 } // end namespace clang 3427