1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" 51 << "T=" << (unsigned)I->Tok->getType() 52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 53 << "\"] "; 54 for (const auto *CI = I->Children.begin(), *CE = I->Children.end(); 55 CI != CE; ++CI) { 56 OS << "\n"; 57 printLine(OS, *CI, (Prefix + " ").str()); 58 NewLine = true; 59 } 60 } 61 if (!NewLine) 62 OS << "\n"; 63 } 64 65 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 66 printLine(llvm::dbgs(), Line); 67 } 68 69 class ScopedDeclarationState { 70 public: 71 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 72 bool MustBeDeclaration) 73 : Line(Line), Stack(Stack) { 74 Line.MustBeDeclaration = MustBeDeclaration; 75 Stack.push_back(MustBeDeclaration); 76 } 77 ~ScopedDeclarationState() { 78 Stack.pop_back(); 79 if (!Stack.empty()) 80 Line.MustBeDeclaration = Stack.back(); 81 else 82 Line.MustBeDeclaration = true; 83 } 84 85 private: 86 UnwrappedLine &Line; 87 llvm::BitVector &Stack; 88 }; 89 90 } // end anonymous namespace 91 92 std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) { 93 llvm::raw_os_ostream OS(Stream); 94 printLine(OS, Line); 95 return Stream; 96 } 97 98 class ScopedLineState { 99 public: 100 ScopedLineState(UnwrappedLineParser &Parser, 101 bool SwitchToPreprocessorLines = false) 102 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 103 if (SwitchToPreprocessorLines) 104 Parser.CurrentLines = &Parser.PreprocessorDirectives; 105 else if (!Parser.Line->Tokens.empty()) 106 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 107 PreBlockLine = std::move(Parser.Line); 108 Parser.Line = std::make_unique<UnwrappedLine>(); 109 Parser.Line->Level = PreBlockLine->Level; 110 Parser.Line->PPLevel = PreBlockLine->PPLevel; 111 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 112 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 113 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel; 114 } 115 116 ~ScopedLineState() { 117 if (!Parser.Line->Tokens.empty()) 118 Parser.addUnwrappedLine(); 119 assert(Parser.Line->Tokens.empty()); 120 Parser.Line = std::move(PreBlockLine); 121 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 122 Parser.MustBreakBeforeNextToken = true; 123 Parser.CurrentLines = OriginalLines; 124 } 125 126 private: 127 UnwrappedLineParser &Parser; 128 129 std::unique_ptr<UnwrappedLine> PreBlockLine; 130 SmallVectorImpl<UnwrappedLine> *OriginalLines; 131 }; 132 133 class CompoundStatementIndenter { 134 public: 135 CompoundStatementIndenter(UnwrappedLineParser *Parser, 136 const FormatStyle &Style, unsigned &LineLevel) 137 : CompoundStatementIndenter(Parser, LineLevel, 138 Style.BraceWrapping.AfterControlStatement, 139 Style.BraceWrapping.IndentBraces) {} 140 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 141 bool WrapBrace, bool IndentBrace) 142 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 143 if (WrapBrace) 144 Parser->addUnwrappedLine(); 145 if (IndentBrace) 146 ++LineLevel; 147 } 148 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 149 150 private: 151 unsigned &LineLevel; 152 unsigned OldLineLevel; 153 }; 154 155 UnwrappedLineParser::UnwrappedLineParser( 156 SourceManager &SourceMgr, const FormatStyle &Style, 157 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 158 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 159 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 160 IdentifierTable &IdentTable) 161 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 162 CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()), 163 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), 164 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 165 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 166 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 167 ? IG_Rejected 168 : IG_Inited), 169 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 170 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) { 171 assert(IsCpp == LangOpts.CXXOperatorNames); 172 } 173 174 void UnwrappedLineParser::reset() { 175 PPBranchLevel = -1; 176 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 177 ? IG_Rejected 178 : IG_Inited; 179 IncludeGuardToken = nullptr; 180 Line.reset(new UnwrappedLine); 181 CommentsBeforeNextToken.clear(); 182 FormatTok = nullptr; 183 MustBreakBeforeNextToken = false; 184 IsDecltypeAutoFunction = false; 185 PreprocessorDirectives.clear(); 186 CurrentLines = &Lines; 187 DeclarationScopeStack.clear(); 188 NestedTooDeep.clear(); 189 NestedLambdas.clear(); 190 PPStack.clear(); 191 Line->FirstStartColumn = FirstStartColumn; 192 193 if (!Unexpanded.empty()) 194 for (FormatToken *Token : AllTokens) 195 Token->MacroCtx.reset(); 196 CurrentExpandedLines.clear(); 197 ExpandedLines.clear(); 198 Unexpanded.clear(); 199 InExpansion = false; 200 Reconstruct.reset(); 201 } 202 203 void UnwrappedLineParser::parse() { 204 IndexedTokenSource TokenSource(AllTokens); 205 Line->FirstStartColumn = FirstStartColumn; 206 do { 207 LLVM_DEBUG(llvm::dbgs() << "----\n"); 208 reset(); 209 Tokens = &TokenSource; 210 TokenSource.reset(); 211 212 readToken(); 213 parseFile(); 214 215 // If we found an include guard then all preprocessor directives (other than 216 // the guard) are over-indented by one. 217 if (IncludeGuard == IG_Found) { 218 for (auto &Line : Lines) 219 if (Line.InPPDirective && Line.Level > 0) 220 --Line.Level; 221 } 222 223 // Create line with eof token. 224 assert(eof()); 225 pushToken(FormatTok); 226 addUnwrappedLine(); 227 228 // In a first run, format everything with the lines containing macro calls 229 // replaced by the expansion. 230 if (!ExpandedLines.empty()) { 231 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 232 for (const auto &Line : Lines) { 233 if (!Line.Tokens.empty()) { 234 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 235 if (it != ExpandedLines.end()) { 236 for (const auto &Expanded : it->second) { 237 LLVM_DEBUG(printDebugInfo(Expanded)); 238 Callback.consumeUnwrappedLine(Expanded); 239 } 240 continue; 241 } 242 } 243 LLVM_DEBUG(printDebugInfo(Line)); 244 Callback.consumeUnwrappedLine(Line); 245 } 246 Callback.finishRun(); 247 } 248 249 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 250 for (const UnwrappedLine &Line : Lines) { 251 LLVM_DEBUG(printDebugInfo(Line)); 252 Callback.consumeUnwrappedLine(Line); 253 } 254 Callback.finishRun(); 255 Lines.clear(); 256 while (!PPLevelBranchIndex.empty() && 257 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 258 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 259 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 260 } 261 if (!PPLevelBranchIndex.empty()) { 262 ++PPLevelBranchIndex.back(); 263 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 264 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 265 } 266 } while (!PPLevelBranchIndex.empty()); 267 } 268 269 void UnwrappedLineParser::parseFile() { 270 // The top-level context in a file always has declarations, except for pre- 271 // processor directives and JavaScript files. 272 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 273 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 274 MustBeDeclaration); 275 if (Style.Language == FormatStyle::LK_TextProto) 276 parseBracedList(); 277 else 278 parseLevel(); 279 // Make sure to format the remaining tokens. 280 // 281 // LK_TextProto is special since its top-level is parsed as the body of a 282 // braced list, which does not necessarily have natural line separators such 283 // as a semicolon. Comments after the last entry that have been determined to 284 // not belong to that line, as in: 285 // key: value 286 // // endfile comment 287 // do not have a chance to be put on a line of their own until this point. 288 // Here we add this newline before end-of-file comments. 289 if (Style.Language == FormatStyle::LK_TextProto && 290 !CommentsBeforeNextToken.empty()) { 291 addUnwrappedLine(); 292 } 293 flushComments(true); 294 addUnwrappedLine(); 295 } 296 297 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 298 do { 299 switch (FormatTok->Tok.getKind()) { 300 case tok::l_brace: 301 return; 302 default: 303 if (FormatTok->is(Keywords.kw_where)) { 304 addUnwrappedLine(); 305 nextToken(); 306 parseCSharpGenericTypeConstraint(); 307 break; 308 } 309 nextToken(); 310 break; 311 } 312 } while (!eof()); 313 } 314 315 void UnwrappedLineParser::parseCSharpAttribute() { 316 int UnpairedSquareBrackets = 1; 317 do { 318 switch (FormatTok->Tok.getKind()) { 319 case tok::r_square: 320 nextToken(); 321 --UnpairedSquareBrackets; 322 if (UnpairedSquareBrackets == 0) { 323 addUnwrappedLine(); 324 return; 325 } 326 break; 327 case tok::l_square: 328 ++UnpairedSquareBrackets; 329 nextToken(); 330 break; 331 default: 332 nextToken(); 333 break; 334 } 335 } while (!eof()); 336 } 337 338 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 339 if (!Lines.empty() && Lines.back().InPPDirective) 340 return true; 341 342 const FormatToken *Previous = Tokens->getPreviousToken(); 343 return Previous && Previous->is(tok::comment) && 344 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 345 } 346 347 /// \brief Parses a level, that is ???. 348 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level. 349 /// \param IfKind The \p if statement kind in the level. 350 /// \param IfLeftBrace The left brace of the \p if block in the level. 351 /// \returns true if a simple block of if/else/for/while, or false otherwise. 352 /// (A simple block has a single statement.) 353 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 354 IfStmtKind *IfKind, 355 FormatToken **IfLeftBrace) { 356 const bool InRequiresExpression = 357 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 358 const bool IsPrecededByCommentOrPPDirective = 359 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 360 FormatToken *IfLBrace = nullptr; 361 bool HasDoWhile = false; 362 bool HasLabel = false; 363 unsigned StatementCount = 0; 364 bool SwitchLabelEncountered = false; 365 366 do { 367 if (FormatTok->isAttribute()) { 368 nextToken(); 369 if (FormatTok->is(tok::l_paren)) 370 parseParens(); 371 continue; 372 } 373 tok::TokenKind Kind = FormatTok->Tok.getKind(); 374 if (FormatTok->is(TT_MacroBlockBegin)) 375 Kind = tok::l_brace; 376 else if (FormatTok->is(TT_MacroBlockEnd)) 377 Kind = tok::r_brace; 378 379 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, 380 &HasLabel, &StatementCount] { 381 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace, 382 HasDoWhile ? nullptr : &HasDoWhile, 383 HasLabel ? nullptr : &HasLabel); 384 ++StatementCount; 385 assert(StatementCount > 0 && "StatementCount overflow!"); 386 }; 387 388 switch (Kind) { 389 case tok::comment: 390 nextToken(); 391 addUnwrappedLine(); 392 break; 393 case tok::l_brace: 394 if (InRequiresExpression) { 395 FormatTok->setFinalizedType(TT_CompoundRequirementLBrace); 396 } else if (FormatTok->Previous && 397 FormatTok->Previous->ClosesRequiresClause) { 398 // We need the 'default' case here to correctly parse a function 399 // l_brace. 400 ParseDefault(); 401 continue; 402 } 403 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) { 404 if (tryToParseBracedList()) 405 continue; 406 FormatTok->setFinalizedType(TT_BlockLBrace); 407 } 408 parseBlock(); 409 ++StatementCount; 410 assert(StatementCount > 0 && "StatementCount overflow!"); 411 addUnwrappedLine(); 412 break; 413 case tok::r_brace: 414 if (OpeningBrace) { 415 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 416 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 417 return false; 418 } 419 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 420 HasDoWhile || IsPrecededByCommentOrPPDirective || 421 precededByCommentOrPPDirective()) { 422 return false; 423 } 424 const FormatToken *Next = Tokens->peekNextToken(); 425 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 426 return false; 427 if (IfLeftBrace) 428 *IfLeftBrace = IfLBrace; 429 return true; 430 } 431 nextToken(); 432 addUnwrappedLine(); 433 break; 434 case tok::kw_default: { 435 unsigned StoredPosition = Tokens->getPosition(); 436 auto *Next = Tokens->getNextNonComment(); 437 FormatTok = Tokens->setPosition(StoredPosition); 438 if (!Next->isOneOf(tok::colon, tok::arrow)) { 439 // default not followed by `:` or `->` is not a case label; treat it 440 // like an identifier. 441 parseStructuralElement(); 442 break; 443 } 444 // Else, if it is 'default:', fall through to the case handling. 445 [[fallthrough]]; 446 } 447 case tok::kw_case: 448 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() || 449 (Style.isJavaScript() && Line->MustBeDeclaration)) { 450 // Proto: there are no switch/case statements 451 // Verilog: Case labels don't have this word. We handle case 452 // labels including default in TokenAnnotator. 453 // JavaScript: A 'case: string' style field declaration. 454 ParseDefault(); 455 break; 456 } 457 if (!SwitchLabelEncountered && 458 (Style.IndentCaseLabels || 459 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) || 460 (Line->InPPDirective && Line->Level == 1))) { 461 ++Line->Level; 462 } 463 SwitchLabelEncountered = true; 464 parseStructuralElement(); 465 break; 466 case tok::l_square: 467 if (Style.isCSharp()) { 468 nextToken(); 469 parseCSharpAttribute(); 470 break; 471 } 472 if (handleCppAttributes()) 473 break; 474 [[fallthrough]]; 475 default: 476 ParseDefault(); 477 break; 478 } 479 } while (!eof()); 480 481 return false; 482 } 483 484 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 485 // We'll parse forward through the tokens until we hit 486 // a closing brace or eof - note that getNextToken() will 487 // parse macros, so this will magically work inside macro 488 // definitions, too. 489 unsigned StoredPosition = Tokens->getPosition(); 490 FormatToken *Tok = FormatTok; 491 const FormatToken *PrevTok = Tok->Previous; 492 // Keep a stack of positions of lbrace tokens. We will 493 // update information about whether an lbrace starts a 494 // braced init list or a different block during the loop. 495 struct StackEntry { 496 FormatToken *Tok; 497 const FormatToken *PrevTok; 498 }; 499 SmallVector<StackEntry, 8> LBraceStack; 500 assert(Tok->is(tok::l_brace)); 501 502 do { 503 auto *NextTok = Tokens->getNextNonComment(); 504 505 if (!Line->InMacroBody && !Style.isTableGen()) { 506 // Skip PPDirective lines (except macro definitions) and comments. 507 while (NextTok->is(tok::hash)) { 508 NextTok = Tokens->getNextToken(); 509 if (NextTok->isOneOf(tok::pp_not_keyword, tok::pp_define)) 510 break; 511 do { 512 NextTok = Tokens->getNextToken(); 513 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); 514 515 while (NextTok->is(tok::comment)) 516 NextTok = Tokens->getNextToken(); 517 } 518 } 519 520 switch (Tok->Tok.getKind()) { 521 case tok::l_brace: 522 if (Style.isJavaScript() && PrevTok) { 523 if (PrevTok->isOneOf(tok::colon, tok::less)) { 524 // A ':' indicates this code is in a type, or a braced list 525 // following a label in an object literal ({a: {b: 1}}). 526 // A '<' could be an object used in a comparison, but that is nonsense 527 // code (can never return true), so more likely it is a generic type 528 // argument (`X<{a: string; b: number}>`). 529 // The code below could be confused by semicolons between the 530 // individual members in a type member list, which would normally 531 // trigger BK_Block. In both cases, this must be parsed as an inline 532 // braced init. 533 Tok->setBlockKind(BK_BracedInit); 534 } else if (PrevTok->is(tok::r_paren)) { 535 // `) { }` can only occur in function or method declarations in JS. 536 Tok->setBlockKind(BK_Block); 537 } 538 } else { 539 Tok->setBlockKind(BK_Unknown); 540 } 541 LBraceStack.push_back({Tok, PrevTok}); 542 break; 543 case tok::r_brace: 544 if (LBraceStack.empty()) 545 break; 546 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) { 547 bool ProbablyBracedList = false; 548 if (Style.Language == FormatStyle::LK_Proto) { 549 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 550 } else if (LBrace->isNot(TT_EnumLBrace)) { 551 // Using OriginalColumn to distinguish between ObjC methods and 552 // binary operators is a bit hacky. 553 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 554 NextTok->OriginalColumn == 0; 555 556 // Try to detect a braced list. Note that regardless how we mark inner 557 // braces here, we will overwrite the BlockKind later if we parse a 558 // braced list (where all blocks inside are by default braced lists), 559 // or when we explicitly detect blocks (for example while parsing 560 // lambdas). 561 562 // If we already marked the opening brace as braced list, the closing 563 // must also be part of it. 564 ProbablyBracedList = LBrace->is(TT_BracedListLBrace); 565 566 ProbablyBracedList = ProbablyBracedList || 567 (Style.isJavaScript() && 568 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 569 Keywords.kw_as)); 570 ProbablyBracedList = 571 ProbablyBracedList || 572 (IsCpp && (PrevTok->Tok.isLiteral() || 573 NextTok->isOneOf(tok::l_paren, tok::arrow))); 574 575 // If there is a comma, semicolon or right paren after the closing 576 // brace, we assume this is a braced initializer list. 577 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 578 // braced list in JS. 579 ProbablyBracedList = 580 ProbablyBracedList || 581 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 582 tok::r_paren, tok::r_square, tok::ellipsis); 583 584 // Distinguish between braced list in a constructor initializer list 585 // followed by constructor body, or just adjacent blocks. 586 ProbablyBracedList = 587 ProbablyBracedList || 588 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 589 LBraceStack.back().PrevTok->isOneOf(tok::identifier, 590 tok::greater)); 591 592 ProbablyBracedList = 593 ProbablyBracedList || 594 (NextTok->is(tok::identifier) && 595 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 596 597 ProbablyBracedList = ProbablyBracedList || 598 (NextTok->is(tok::semi) && 599 (!ExpectClassBody || LBraceStack.size() != 1)); 600 601 ProbablyBracedList = 602 ProbablyBracedList || 603 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 604 605 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 606 // We can have an array subscript after a braced init 607 // list, but C++11 attributes are expected after blocks. 608 NextTok = Tokens->getNextToken(); 609 ProbablyBracedList = NextTok->isNot(tok::l_square); 610 } 611 612 // Cpp macro definition body that is a nonempty braced list or block: 613 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && 614 !FormatTok->Previous && NextTok->is(tok::eof) && 615 // A statement can end with only `;` (simple statement), a block 616 // closing brace (compound statement), or `:` (label statement). 617 // If PrevTok is a block opening brace, Tok ends an empty block. 618 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) { 619 ProbablyBracedList = true; 620 } 621 } 622 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block; 623 Tok->setBlockKind(BlockKind); 624 LBrace->setBlockKind(BlockKind); 625 } 626 LBraceStack.pop_back(); 627 break; 628 case tok::identifier: 629 if (Tok->isNot(TT_StatementMacro)) 630 break; 631 [[fallthrough]]; 632 case tok::at: 633 case tok::semi: 634 case tok::kw_if: 635 case tok::kw_while: 636 case tok::kw_for: 637 case tok::kw_switch: 638 case tok::kw_try: 639 case tok::kw___try: 640 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 641 LBraceStack.back().Tok->setBlockKind(BK_Block); 642 break; 643 default: 644 break; 645 } 646 647 PrevTok = Tok; 648 Tok = NextTok; 649 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 650 651 // Assume other blocks for all unclosed opening braces. 652 for (const auto &Entry : LBraceStack) 653 if (Entry.Tok->is(BK_Unknown)) 654 Entry.Tok->setBlockKind(BK_Block); 655 656 FormatTok = Tokens->setPosition(StoredPosition); 657 } 658 659 // Sets the token type of the directly previous right brace. 660 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { 661 if (auto Prev = FormatTok->getPreviousNonComment(); 662 Prev && Prev->is(tok::r_brace)) { 663 Prev->setFinalizedType(Type); 664 } 665 } 666 667 template <class T> 668 static inline void hash_combine(std::size_t &seed, const T &v) { 669 std::hash<T> hasher; 670 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 671 } 672 673 size_t UnwrappedLineParser::computePPHash() const { 674 size_t h = 0; 675 for (const auto &i : PPStack) { 676 hash_combine(h, size_t(i.Kind)); 677 hash_combine(h, i.Line); 678 } 679 return h; 680 } 681 682 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 683 // is not null, subtracts its length (plus the preceding space) when computing 684 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 685 // running the token annotator on it so that we can restore them afterward. 686 bool UnwrappedLineParser::mightFitOnOneLine( 687 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 688 const auto ColumnLimit = Style.ColumnLimit; 689 if (ColumnLimit == 0) 690 return true; 691 692 auto &Tokens = ParsedLine.Tokens; 693 assert(!Tokens.empty()); 694 695 const auto *LastToken = Tokens.back().Tok; 696 assert(LastToken); 697 698 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 699 700 int Index = 0; 701 for (const auto &Token : Tokens) { 702 assert(Token.Tok); 703 auto &SavedToken = SavedTokens[Index++]; 704 SavedToken.Tok = new FormatToken; 705 SavedToken.Tok->copyFrom(*Token.Tok); 706 SavedToken.Children = std::move(Token.Children); 707 } 708 709 AnnotatedLine Line(ParsedLine); 710 assert(Line.Last == LastToken); 711 712 TokenAnnotator Annotator(Style, Keywords); 713 Annotator.annotate(Line); 714 Annotator.calculateFormattingInformation(Line); 715 716 auto Length = LastToken->TotalLength; 717 if (OpeningBrace) { 718 assert(OpeningBrace != Tokens.front().Tok); 719 if (auto Prev = OpeningBrace->Previous; 720 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 721 Length -= ColumnLimit; 722 } 723 Length -= OpeningBrace->TokenText.size() + 1; 724 } 725 726 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 727 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 728 Length -= FirstToken->TokenText.size() + 1; 729 } 730 731 Index = 0; 732 for (auto &Token : Tokens) { 733 const auto &SavedToken = SavedTokens[Index++]; 734 Token.Tok->copyFrom(*SavedToken.Tok); 735 Token.Children = std::move(SavedToken.Children); 736 delete SavedToken.Tok; 737 } 738 739 // If these change PPLevel needs to be used for get correct indentation. 740 assert(!Line.InMacroBody); 741 assert(!Line.InPPDirective); 742 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 743 } 744 745 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 746 unsigned AddLevels, bool MunchSemi, 747 bool KeepBraces, 748 IfStmtKind *IfKind, 749 bool UnindentWhitesmithsBraces) { 750 auto HandleVerilogBlockLabel = [this]() { 751 // ":" name 752 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 753 nextToken(); 754 if (Keywords.isVerilogIdentifier(*FormatTok)) 755 nextToken(); 756 } 757 }; 758 759 // Whether this is a Verilog-specific block that has a special header like a 760 // module. 761 const bool VerilogHierarchy = 762 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 763 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 764 (Style.isVerilog() && 765 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 766 "'{' or macro block token expected"); 767 FormatToken *Tok = FormatTok; 768 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 769 auto Index = CurrentLines->size(); 770 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 771 FormatTok->setBlockKind(BK_Block); 772 773 // For Whitesmiths mode, jump to the next level prior to skipping over the 774 // braces. 775 if (!VerilogHierarchy && AddLevels > 0 && 776 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 777 ++Line->Level; 778 } 779 780 size_t PPStartHash = computePPHash(); 781 782 const unsigned InitialLevel = Line->Level; 783 if (VerilogHierarchy) { 784 AddLevels += parseVerilogHierarchyHeader(); 785 } else { 786 nextToken(/*LevelDifference=*/AddLevels); 787 HandleVerilogBlockLabel(); 788 } 789 790 // Bail out if there are too many levels. Otherwise, the stack might overflow. 791 if (Line->Level > 300) 792 return nullptr; 793 794 if (MacroBlock && FormatTok->is(tok::l_paren)) 795 parseParens(); 796 797 size_t NbPreprocessorDirectives = 798 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 799 addUnwrappedLine(); 800 size_t OpeningLineIndex = 801 CurrentLines->empty() 802 ? (UnwrappedLine::kInvalidIndex) 803 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 804 805 // Whitesmiths is weird here. The brace needs to be indented for the namespace 806 // block, but the block itself may not be indented depending on the style 807 // settings. This allows the format to back up one level in those cases. 808 if (UnindentWhitesmithsBraces) 809 --Line->Level; 810 811 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 812 MustBeDeclaration); 813 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 814 Line->Level += AddLevels; 815 816 FormatToken *IfLBrace = nullptr; 817 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace); 818 819 if (eof()) 820 return IfLBrace; 821 822 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd) 823 : FormatTok->isNot(tok::r_brace)) { 824 Line->Level = InitialLevel; 825 FormatTok->setBlockKind(BK_Block); 826 return IfLBrace; 827 } 828 829 if (FormatTok->is(tok::r_brace)) { 830 FormatTok->setBlockKind(BK_Block); 831 if (Tok->is(TT_NamespaceLBrace)) 832 FormatTok->setFinalizedType(TT_NamespaceRBrace); 833 } 834 835 const bool IsFunctionRBrace = 836 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 837 838 auto RemoveBraces = [=]() mutable { 839 if (!SimpleBlock) 840 return false; 841 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 842 assert(FormatTok->is(tok::r_brace)); 843 const bool WrappedOpeningBrace = !Tok->Previous; 844 if (WrappedOpeningBrace && FollowedByComment) 845 return false; 846 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 847 if (KeepBraces && !HasRequiredIfBraces) 848 return false; 849 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 850 const FormatToken *Previous = Tokens->getPreviousToken(); 851 assert(Previous); 852 if (Previous->is(tok::r_brace) && !Previous->Optional) 853 return false; 854 } 855 assert(!CurrentLines->empty()); 856 auto &LastLine = CurrentLines->back(); 857 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 858 return false; 859 if (Tok->is(TT_ElseLBrace)) 860 return true; 861 if (WrappedOpeningBrace) { 862 assert(Index > 0); 863 --Index; // The line above the wrapped l_brace. 864 Tok = nullptr; 865 } 866 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 867 }; 868 if (RemoveBraces()) { 869 Tok->MatchingParen = FormatTok; 870 FormatTok->MatchingParen = Tok; 871 } 872 873 size_t PPEndHash = computePPHash(); 874 875 // Munch the closing brace. 876 nextToken(/*LevelDifference=*/-AddLevels); 877 878 // When this is a function block and there is an unnecessary semicolon 879 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 880 // it later). 881 if (Style.RemoveSemicolon && IsFunctionRBrace) { 882 while (FormatTok->is(tok::semi)) { 883 FormatTok->Optional = true; 884 nextToken(); 885 } 886 } 887 888 HandleVerilogBlockLabel(); 889 890 if (MacroBlock && FormatTok->is(tok::l_paren)) 891 parseParens(); 892 893 Line->Level = InitialLevel; 894 895 if (FormatTok->is(tok::kw_noexcept)) { 896 // A noexcept in a requires expression. 897 nextToken(); 898 } 899 900 if (FormatTok->is(tok::arrow)) { 901 // Following the } or noexcept we can find a trailing return type arrow 902 // as part of an implicit conversion constraint. 903 nextToken(); 904 parseStructuralElement(); 905 } 906 907 if (MunchSemi && FormatTok->is(tok::semi)) 908 nextToken(); 909 910 if (PPStartHash == PPEndHash) { 911 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 912 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 913 // Update the opening line to add the forward reference as well 914 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 915 CurrentLines->size() - 1; 916 } 917 } 918 919 return IfLBrace; 920 } 921 922 static bool isGoogScope(const UnwrappedLine &Line) { 923 // FIXME: Closure-library specific stuff should not be hard-coded but be 924 // configurable. 925 if (Line.Tokens.size() < 4) 926 return false; 927 auto I = Line.Tokens.begin(); 928 if (I->Tok->TokenText != "goog") 929 return false; 930 ++I; 931 if (I->Tok->isNot(tok::period)) 932 return false; 933 ++I; 934 if (I->Tok->TokenText != "scope") 935 return false; 936 ++I; 937 return I->Tok->is(tok::l_paren); 938 } 939 940 static bool isIIFE(const UnwrappedLine &Line, 941 const AdditionalKeywords &Keywords) { 942 // Look for the start of an immediately invoked anonymous function. 943 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 944 // This is commonly done in JavaScript to create a new, anonymous scope. 945 // Example: (function() { ... })() 946 if (Line.Tokens.size() < 3) 947 return false; 948 auto I = Line.Tokens.begin(); 949 if (I->Tok->isNot(tok::l_paren)) 950 return false; 951 ++I; 952 if (I->Tok->isNot(Keywords.kw_function)) 953 return false; 954 ++I; 955 return I->Tok->is(tok::l_paren); 956 } 957 958 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 959 const FormatToken &InitialToken) { 960 tok::TokenKind Kind = InitialToken.Tok.getKind(); 961 if (InitialToken.is(TT_NamespaceMacro)) 962 Kind = tok::kw_namespace; 963 964 switch (Kind) { 965 case tok::kw_namespace: 966 return Style.BraceWrapping.AfterNamespace; 967 case tok::kw_class: 968 return Style.BraceWrapping.AfterClass; 969 case tok::kw_union: 970 return Style.BraceWrapping.AfterUnion; 971 case tok::kw_struct: 972 return Style.BraceWrapping.AfterStruct; 973 case tok::kw_enum: 974 return Style.BraceWrapping.AfterEnum; 975 default: 976 return false; 977 } 978 } 979 980 void UnwrappedLineParser::parseChildBlock() { 981 assert(FormatTok->is(tok::l_brace)); 982 FormatTok->setBlockKind(BK_Block); 983 const FormatToken *OpeningBrace = FormatTok; 984 nextToken(); 985 { 986 bool SkipIndent = (Style.isJavaScript() && 987 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 988 ScopedLineState LineState(*this); 989 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 990 /*MustBeDeclaration=*/false); 991 Line->Level += SkipIndent ? 0 : 1; 992 parseLevel(OpeningBrace); 993 flushComments(isOnNewLine(*FormatTok)); 994 Line->Level -= SkipIndent ? 0 : 1; 995 } 996 nextToken(); 997 } 998 999 void UnwrappedLineParser::parsePPDirective() { 1000 assert(FormatTok->is(tok::hash) && "'#' expected"); 1001 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 1002 1003 nextToken(); 1004 1005 if (!FormatTok->Tok.getIdentifierInfo()) { 1006 parsePPUnknown(); 1007 return; 1008 } 1009 1010 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 1011 case tok::pp_define: 1012 parsePPDefine(); 1013 return; 1014 case tok::pp_if: 1015 parsePPIf(/*IfDef=*/false); 1016 break; 1017 case tok::pp_ifdef: 1018 case tok::pp_ifndef: 1019 parsePPIf(/*IfDef=*/true); 1020 break; 1021 case tok::pp_else: 1022 case tok::pp_elifdef: 1023 case tok::pp_elifndef: 1024 case tok::pp_elif: 1025 parsePPElse(); 1026 break; 1027 case tok::pp_endif: 1028 parsePPEndIf(); 1029 break; 1030 case tok::pp_pragma: 1031 parsePPPragma(); 1032 break; 1033 case tok::pp_error: 1034 case tok::pp_warning: 1035 nextToken(); 1036 if (!eof() && Style.isCpp()) 1037 FormatTok->setFinalizedType(TT_AfterPPDirective); 1038 [[fallthrough]]; 1039 default: 1040 parsePPUnknown(); 1041 break; 1042 } 1043 } 1044 1045 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1046 size_t Line = CurrentLines->size(); 1047 if (CurrentLines == &PreprocessorDirectives) 1048 Line += Lines.size(); 1049 1050 if (Unreachable || 1051 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1052 PPStack.push_back({PP_Unreachable, Line}); 1053 } else { 1054 PPStack.push_back({PP_Conditional, Line}); 1055 } 1056 } 1057 1058 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1059 ++PPBranchLevel; 1060 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1061 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1062 PPLevelBranchIndex.push_back(0); 1063 PPLevelBranchCount.push_back(0); 1064 } 1065 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1066 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1067 conditionalCompilationCondition(Unreachable || Skip); 1068 } 1069 1070 void UnwrappedLineParser::conditionalCompilationAlternative() { 1071 if (!PPStack.empty()) 1072 PPStack.pop_back(); 1073 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1074 if (!PPChainBranchIndex.empty()) 1075 ++PPChainBranchIndex.top(); 1076 conditionalCompilationCondition( 1077 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1078 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1079 } 1080 1081 void UnwrappedLineParser::conditionalCompilationEnd() { 1082 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1083 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1084 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1085 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1086 } 1087 // Guard against #endif's without #if. 1088 if (PPBranchLevel > -1) 1089 --PPBranchLevel; 1090 if (!PPChainBranchIndex.empty()) 1091 PPChainBranchIndex.pop(); 1092 if (!PPStack.empty()) 1093 PPStack.pop_back(); 1094 } 1095 1096 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1097 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1098 nextToken(); 1099 bool Unreachable = false; 1100 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1101 Unreachable = true; 1102 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1103 Unreachable = true; 1104 conditionalCompilationStart(Unreachable); 1105 FormatToken *IfCondition = FormatTok; 1106 // If there's a #ifndef on the first line, and the only lines before it are 1107 // comments, it could be an include guard. 1108 bool MaybeIncludeGuard = IfNDef; 1109 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1110 for (auto &Line : Lines) { 1111 if (Line.Tokens.front().Tok->isNot(tok::comment)) { 1112 MaybeIncludeGuard = false; 1113 IncludeGuard = IG_Rejected; 1114 break; 1115 } 1116 } 1117 } 1118 --PPBranchLevel; 1119 parsePPUnknown(); 1120 ++PPBranchLevel; 1121 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1122 IncludeGuard = IG_IfNdefed; 1123 IncludeGuardToken = IfCondition; 1124 } 1125 } 1126 1127 void UnwrappedLineParser::parsePPElse() { 1128 // If a potential include guard has an #else, it's not an include guard. 1129 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1130 IncludeGuard = IG_Rejected; 1131 // Don't crash when there is an #else without an #if. 1132 assert(PPBranchLevel >= -1); 1133 if (PPBranchLevel == -1) 1134 conditionalCompilationStart(/*Unreachable=*/true); 1135 conditionalCompilationAlternative(); 1136 --PPBranchLevel; 1137 parsePPUnknown(); 1138 ++PPBranchLevel; 1139 } 1140 1141 void UnwrappedLineParser::parsePPEndIf() { 1142 conditionalCompilationEnd(); 1143 parsePPUnknown(); 1144 // If the #endif of a potential include guard is the last thing in the file, 1145 // then we found an include guard. 1146 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1147 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1148 IncludeGuard = IG_Found; 1149 } 1150 } 1151 1152 void UnwrappedLineParser::parsePPDefine() { 1153 nextToken(); 1154 1155 if (!FormatTok->Tok.getIdentifierInfo()) { 1156 IncludeGuard = IG_Rejected; 1157 IncludeGuardToken = nullptr; 1158 parsePPUnknown(); 1159 return; 1160 } 1161 1162 if (IncludeGuard == IG_IfNdefed && 1163 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1164 IncludeGuard = IG_Defined; 1165 IncludeGuardToken = nullptr; 1166 for (auto &Line : Lines) { 1167 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1168 IncludeGuard = IG_Rejected; 1169 break; 1170 } 1171 } 1172 } 1173 1174 // In the context of a define, even keywords should be treated as normal 1175 // identifiers. Setting the kind to identifier is not enough, because we need 1176 // to treat additional keywords like __except as well, which are already 1177 // identifiers. Setting the identifier info to null interferes with include 1178 // guard processing above, and changes preprocessing nesting. 1179 FormatTok->Tok.setKind(tok::identifier); 1180 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1181 nextToken(); 1182 if (FormatTok->Tok.getKind() == tok::l_paren && 1183 !FormatTok->hasWhitespaceBefore()) { 1184 parseParens(); 1185 } 1186 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1187 Line->Level += PPBranchLevel + 1; 1188 addUnwrappedLine(); 1189 ++Line->Level; 1190 1191 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1192 assert((int)Line->PPLevel >= 0); 1193 Line->InMacroBody = true; 1194 1195 if (Style.SkipMacroDefinitionBody) { 1196 while (!eof()) { 1197 FormatTok->Finalized = true; 1198 FormatTok = Tokens->getNextToken(); 1199 } 1200 addUnwrappedLine(); 1201 return; 1202 } 1203 1204 // Errors during a preprocessor directive can only affect the layout of the 1205 // preprocessor directive, and thus we ignore them. An alternative approach 1206 // would be to use the same approach we use on the file level (no 1207 // re-indentation if there was a structural error) within the macro 1208 // definition. 1209 parseFile(); 1210 } 1211 1212 void UnwrappedLineParser::parsePPPragma() { 1213 Line->InPragmaDirective = true; 1214 parsePPUnknown(); 1215 } 1216 1217 void UnwrappedLineParser::parsePPUnknown() { 1218 while (!eof()) 1219 nextToken(); 1220 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1221 Line->Level += PPBranchLevel + 1; 1222 addUnwrappedLine(); 1223 } 1224 1225 // Here we exclude certain tokens that are not usually the first token in an 1226 // unwrapped line. This is used in attempt to distinguish macro calls without 1227 // trailing semicolons from other constructs split to several lines. 1228 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1229 // Semicolon can be a null-statement, l_square can be a start of a macro or 1230 // a C++11 attribute, but this doesn't seem to be common. 1231 return !Tok.isOneOf(tok::semi, tok::l_brace, 1232 // Tokens that can only be used as binary operators and a 1233 // part of overloaded operator names. 1234 tok::period, tok::periodstar, tok::arrow, tok::arrowstar, 1235 tok::less, tok::greater, tok::slash, tok::percent, 1236 tok::lessless, tok::greatergreater, tok::equal, 1237 tok::plusequal, tok::minusequal, tok::starequal, 1238 tok::slashequal, tok::percentequal, tok::ampequal, 1239 tok::pipeequal, tok::caretequal, tok::greatergreaterequal, 1240 tok::lesslessequal, 1241 // Colon is used in labels, base class lists, initializer 1242 // lists, range-based for loops, ternary operator, but 1243 // should never be the first token in an unwrapped line. 1244 tok::colon, 1245 // 'noexcept' is a trailing annotation. 1246 tok::kw_noexcept); 1247 } 1248 1249 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1250 const FormatToken *FormatTok) { 1251 // FIXME: This returns true for C/C++ keywords like 'struct'. 1252 return FormatTok->is(tok::identifier) && 1253 (!FormatTok->Tok.getIdentifierInfo() || 1254 !FormatTok->isOneOf( 1255 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1256 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1257 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1258 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1259 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1260 Keywords.kw_instanceof, Keywords.kw_interface, 1261 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1262 } 1263 1264 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1265 const FormatToken *FormatTok) { 1266 return FormatTok->Tok.isLiteral() || 1267 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1268 mustBeJSIdent(Keywords, FormatTok); 1269 } 1270 1271 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1272 // when encountered after a value (see mustBeJSIdentOrValue). 1273 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1274 const FormatToken *FormatTok) { 1275 return FormatTok->isOneOf( 1276 tok::kw_return, Keywords.kw_yield, 1277 // conditionals 1278 tok::kw_if, tok::kw_else, 1279 // loops 1280 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1281 // switch/case 1282 tok::kw_switch, tok::kw_case, 1283 // exceptions 1284 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1285 // declaration 1286 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1287 Keywords.kw_async, Keywords.kw_function, 1288 // import/export 1289 Keywords.kw_import, tok::kw_export); 1290 } 1291 1292 // Checks whether a token is a type in K&R C (aka C78). 1293 static bool isC78Type(const FormatToken &Tok) { 1294 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1295 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1296 tok::identifier); 1297 } 1298 1299 // This function checks whether a token starts the first parameter declaration 1300 // in a K&R C (aka C78) function definition, e.g.: 1301 // int f(a, b) 1302 // short a, b; 1303 // { 1304 // return a + b; 1305 // } 1306 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1307 const FormatToken *FuncName) { 1308 assert(Tok); 1309 assert(Next); 1310 assert(FuncName); 1311 1312 if (FuncName->isNot(tok::identifier)) 1313 return false; 1314 1315 const FormatToken *Prev = FuncName->Previous; 1316 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1317 return false; 1318 1319 if (!isC78Type(*Tok) && 1320 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1321 return false; 1322 } 1323 1324 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1325 return false; 1326 1327 Tok = Tok->Previous; 1328 if (!Tok || Tok->isNot(tok::r_paren)) 1329 return false; 1330 1331 Tok = Tok->Previous; 1332 if (!Tok || Tok->isNot(tok::identifier)) 1333 return false; 1334 1335 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1336 } 1337 1338 bool UnwrappedLineParser::parseModuleImport() { 1339 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1340 1341 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1342 !Token->Tok.getIdentifierInfo() && 1343 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1344 return false; 1345 } 1346 1347 nextToken(); 1348 while (!eof()) { 1349 if (FormatTok->is(tok::colon)) { 1350 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1351 } 1352 // Handle import <foo/bar.h> as we would an include statement. 1353 else if (FormatTok->is(tok::less)) { 1354 nextToken(); 1355 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1356 // Mark tokens up to the trailing line comments as implicit string 1357 // literals. 1358 if (FormatTok->isNot(tok::comment) && 1359 !FormatTok->TokenText.starts_with("//")) { 1360 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1361 } 1362 nextToken(); 1363 } 1364 } 1365 if (FormatTok->is(tok::semi)) { 1366 nextToken(); 1367 break; 1368 } 1369 nextToken(); 1370 } 1371 1372 addUnwrappedLine(); 1373 return true; 1374 } 1375 1376 // readTokenWithJavaScriptASI reads the next token and terminates the current 1377 // line if JavaScript Automatic Semicolon Insertion must 1378 // happen between the current token and the next token. 1379 // 1380 // This method is conservative - it cannot cover all edge cases of JavaScript, 1381 // but only aims to correctly handle certain well known cases. It *must not* 1382 // return true in speculative cases. 1383 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1384 FormatToken *Previous = FormatTok; 1385 readToken(); 1386 FormatToken *Next = FormatTok; 1387 1388 bool IsOnSameLine = 1389 CommentsBeforeNextToken.empty() 1390 ? Next->NewlinesBefore == 0 1391 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1392 if (IsOnSameLine) 1393 return; 1394 1395 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1396 bool PreviousStartsTemplateExpr = 1397 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${"); 1398 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1399 // If the line contains an '@' sign, the previous token might be an 1400 // annotation, which can precede another identifier/value. 1401 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1402 return LineNode.Tok->is(tok::at); 1403 }); 1404 if (HasAt) 1405 return; 1406 } 1407 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1408 return addUnwrappedLine(); 1409 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1410 bool NextEndsTemplateExpr = 1411 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}"); 1412 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1413 (PreviousMustBeValue || 1414 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1415 tok::minusminus))) { 1416 return addUnwrappedLine(); 1417 } 1418 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1419 isJSDeclOrStmt(Keywords, Next)) { 1420 return addUnwrappedLine(); 1421 } 1422 } 1423 1424 void UnwrappedLineParser::parseStructuralElement( 1425 const FormatToken *OpeningBrace, IfStmtKind *IfKind, 1426 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1427 if (Style.Language == FormatStyle::LK_TableGen && 1428 FormatTok->is(tok::pp_include)) { 1429 nextToken(); 1430 if (FormatTok->is(tok::string_literal)) 1431 nextToken(); 1432 addUnwrappedLine(); 1433 return; 1434 } 1435 1436 if (IsCpp) { 1437 while (FormatTok->is(tok::l_square) && handleCppAttributes()) { 1438 } 1439 } else if (Style.isVerilog()) { 1440 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1441 parseForOrWhileLoop(/*HasParens=*/false); 1442 return; 1443 } 1444 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1445 parseForOrWhileLoop(); 1446 return; 1447 } 1448 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1449 Keywords.kw_assume, Keywords.kw_cover)) { 1450 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1451 return; 1452 } 1453 1454 // Skip things that can exist before keywords like 'if' and 'case'. 1455 while (true) { 1456 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1457 Keywords.kw_unique0)) { 1458 nextToken(); 1459 } else if (FormatTok->is(tok::l_paren) && 1460 Tokens->peekNextToken()->is(tok::star)) { 1461 parseParens(); 1462 } else { 1463 break; 1464 } 1465 } 1466 } 1467 1468 // Tokens that only make sense at the beginning of a line. 1469 if (FormatTok->isAccessSpecifierKeyword()) { 1470 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1471 Style.isCSharp()) { 1472 nextToken(); 1473 } else { 1474 parseAccessSpecifier(); 1475 } 1476 return; 1477 } 1478 switch (FormatTok->Tok.getKind()) { 1479 case tok::kw_asm: 1480 nextToken(); 1481 if (FormatTok->is(tok::l_brace)) { 1482 FormatTok->setFinalizedType(TT_InlineASMBrace); 1483 nextToken(); 1484 while (FormatTok && !eof()) { 1485 if (FormatTok->is(tok::r_brace)) { 1486 FormatTok->setFinalizedType(TT_InlineASMBrace); 1487 nextToken(); 1488 addUnwrappedLine(); 1489 break; 1490 } 1491 FormatTok->Finalized = true; 1492 nextToken(); 1493 } 1494 } 1495 break; 1496 case tok::kw_namespace: 1497 parseNamespace(); 1498 return; 1499 case tok::kw_if: { 1500 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1501 // field/method declaration. 1502 break; 1503 } 1504 FormatToken *Tok = parseIfThenElse(IfKind); 1505 if (IfLeftBrace) 1506 *IfLeftBrace = Tok; 1507 return; 1508 } 1509 case tok::kw_for: 1510 case tok::kw_while: 1511 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1512 // field/method declaration. 1513 break; 1514 } 1515 parseForOrWhileLoop(); 1516 return; 1517 case tok::kw_do: 1518 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1519 // field/method declaration. 1520 break; 1521 } 1522 parseDoWhile(); 1523 if (HasDoWhile) 1524 *HasDoWhile = true; 1525 return; 1526 case tok::kw_switch: 1527 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1528 // 'switch: string' field declaration. 1529 break; 1530 } 1531 parseSwitch(/*IsExpr=*/false); 1532 return; 1533 case tok::kw_default: { 1534 // In Verilog default along with other labels are handled in the next loop. 1535 if (Style.isVerilog()) 1536 break; 1537 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1538 // 'default: string' field declaration. 1539 break; 1540 } 1541 auto *Default = FormatTok; 1542 nextToken(); 1543 if (FormatTok->is(tok::colon)) { 1544 FormatTok->setFinalizedType(TT_CaseLabelColon); 1545 parseLabel(); 1546 return; 1547 } 1548 if (FormatTok->is(tok::arrow)) { 1549 FormatTok->setFinalizedType(TT_CaseLabelArrow); 1550 Default->setFinalizedType(TT_SwitchExpressionLabel); 1551 parseLabel(); 1552 return; 1553 } 1554 // e.g. "default void f() {}" in a Java interface. 1555 break; 1556 } 1557 case tok::kw_case: 1558 // Proto: there are no switch/case statements. 1559 if (Style.Language == FormatStyle::LK_Proto) { 1560 nextToken(); 1561 return; 1562 } 1563 if (Style.isVerilog()) { 1564 parseBlock(); 1565 addUnwrappedLine(); 1566 return; 1567 } 1568 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1569 // 'case: string' field declaration. 1570 nextToken(); 1571 break; 1572 } 1573 parseCaseLabel(); 1574 return; 1575 case tok::kw_goto: 1576 nextToken(); 1577 if (FormatTok->is(tok::kw_case)) 1578 nextToken(); 1579 break; 1580 case tok::kw_try: 1581 case tok::kw___try: 1582 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1583 // field/method declaration. 1584 break; 1585 } 1586 parseTryCatch(); 1587 return; 1588 case tok::kw_extern: 1589 nextToken(); 1590 if (Style.isVerilog()) { 1591 // In Verilog and extern module declaration looks like a start of module. 1592 // But there is no body and endmodule. So we handle it separately. 1593 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1594 parseVerilogHierarchyHeader(); 1595 return; 1596 } 1597 } else if (FormatTok->is(tok::string_literal)) { 1598 nextToken(); 1599 if (FormatTok->is(tok::l_brace)) { 1600 if (Style.BraceWrapping.AfterExternBlock) 1601 addUnwrappedLine(); 1602 // Either we indent or for backwards compatibility we follow the 1603 // AfterExternBlock style. 1604 unsigned AddLevels = 1605 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1606 (Style.BraceWrapping.AfterExternBlock && 1607 Style.IndentExternBlock == 1608 FormatStyle::IEBS_AfterExternBlock) 1609 ? 1u 1610 : 0u; 1611 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1612 addUnwrappedLine(); 1613 return; 1614 } 1615 } 1616 break; 1617 case tok::kw_export: 1618 if (Style.isJavaScript()) { 1619 parseJavaScriptEs6ImportExport(); 1620 return; 1621 } 1622 if (IsCpp) { 1623 nextToken(); 1624 if (FormatTok->is(tok::kw_namespace)) { 1625 parseNamespace(); 1626 return; 1627 } 1628 if (FormatTok->is(tok::l_brace)) { 1629 parseCppExportBlock(); 1630 return; 1631 } 1632 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1633 return; 1634 } 1635 break; 1636 case tok::kw_inline: 1637 nextToken(); 1638 if (FormatTok->is(tok::kw_namespace)) { 1639 parseNamespace(); 1640 return; 1641 } 1642 break; 1643 case tok::identifier: 1644 if (FormatTok->is(TT_ForEachMacro)) { 1645 parseForOrWhileLoop(); 1646 return; 1647 } 1648 if (FormatTok->is(TT_MacroBlockBegin)) { 1649 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1650 /*MunchSemi=*/false); 1651 return; 1652 } 1653 if (FormatTok->is(Keywords.kw_import)) { 1654 if (Style.isJavaScript()) { 1655 parseJavaScriptEs6ImportExport(); 1656 return; 1657 } 1658 if (Style.Language == FormatStyle::LK_Proto) { 1659 nextToken(); 1660 if (FormatTok->is(tok::kw_public)) 1661 nextToken(); 1662 if (FormatTok->isNot(tok::string_literal)) 1663 return; 1664 nextToken(); 1665 if (FormatTok->is(tok::semi)) 1666 nextToken(); 1667 addUnwrappedLine(); 1668 return; 1669 } 1670 if (IsCpp && parseModuleImport()) 1671 return; 1672 } 1673 if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1674 Keywords.kw_slots, Keywords.kw_qslots)) { 1675 nextToken(); 1676 if (FormatTok->is(tok::colon)) { 1677 nextToken(); 1678 addUnwrappedLine(); 1679 return; 1680 } 1681 } 1682 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 1683 parseStatementMacro(); 1684 return; 1685 } 1686 if (IsCpp && FormatTok->is(TT_NamespaceMacro)) { 1687 parseNamespace(); 1688 return; 1689 } 1690 // In Verilog labels can be any expression, so we don't do them here. 1691 // JS doesn't have macros, and within classes colons indicate fields, not 1692 // labels. 1693 // TableGen doesn't have labels. 1694 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() && 1695 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) { 1696 nextToken(); 1697 if (!Line->InMacroBody || CurrentLines->size() > 1) 1698 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1699 FormatTok->setFinalizedType(TT_GotoLabelColon); 1700 parseLabel(!Style.IndentGotoLabels); 1701 if (HasLabel) 1702 *HasLabel = true; 1703 return; 1704 } 1705 // In all other cases, parse the declaration. 1706 break; 1707 default: 1708 break; 1709 } 1710 1711 for (const bool InRequiresExpression = 1712 OpeningBrace && OpeningBrace->isOneOf(TT_RequiresExpressionLBrace, 1713 TT_CompoundRequirementLBrace); 1714 !eof();) { 1715 if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) { 1716 if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true); 1717 Next && Next->isBinaryOperator()) { 1718 FormatTok->Tok.setKind(tok::identifier); 1719 } 1720 } 1721 const FormatToken *Previous = FormatTok->Previous; 1722 switch (FormatTok->Tok.getKind()) { 1723 case tok::at: 1724 nextToken(); 1725 if (FormatTok->is(tok::l_brace)) { 1726 nextToken(); 1727 parseBracedList(); 1728 break; 1729 } else if (Style.Language == FormatStyle::LK_Java && 1730 FormatTok->is(Keywords.kw_interface)) { 1731 nextToken(); 1732 break; 1733 } 1734 switch (FormatTok->Tok.getObjCKeywordID()) { 1735 case tok::objc_public: 1736 case tok::objc_protected: 1737 case tok::objc_package: 1738 case tok::objc_private: 1739 return parseAccessSpecifier(); 1740 case tok::objc_interface: 1741 case tok::objc_implementation: 1742 return parseObjCInterfaceOrImplementation(); 1743 case tok::objc_protocol: 1744 if (parseObjCProtocol()) 1745 return; 1746 break; 1747 case tok::objc_end: 1748 return; // Handled by the caller. 1749 case tok::objc_optional: 1750 case tok::objc_required: 1751 nextToken(); 1752 addUnwrappedLine(); 1753 return; 1754 case tok::objc_autoreleasepool: 1755 nextToken(); 1756 if (FormatTok->is(tok::l_brace)) { 1757 if (Style.BraceWrapping.AfterControlStatement == 1758 FormatStyle::BWACS_Always) { 1759 addUnwrappedLine(); 1760 } 1761 parseBlock(); 1762 } 1763 addUnwrappedLine(); 1764 return; 1765 case tok::objc_synchronized: 1766 nextToken(); 1767 if (FormatTok->is(tok::l_paren)) { 1768 // Skip synchronization object 1769 parseParens(); 1770 } 1771 if (FormatTok->is(tok::l_brace)) { 1772 if (Style.BraceWrapping.AfterControlStatement == 1773 FormatStyle::BWACS_Always) { 1774 addUnwrappedLine(); 1775 } 1776 parseBlock(); 1777 } 1778 addUnwrappedLine(); 1779 return; 1780 case tok::objc_try: 1781 // This branch isn't strictly necessary (the kw_try case below would 1782 // do this too after the tok::at is parsed above). But be explicit. 1783 parseTryCatch(); 1784 return; 1785 default: 1786 break; 1787 } 1788 break; 1789 case tok::kw_requires: { 1790 if (IsCpp) { 1791 bool ParsedClause = parseRequires(); 1792 if (ParsedClause) 1793 return; 1794 } else { 1795 nextToken(); 1796 } 1797 break; 1798 } 1799 case tok::kw_enum: 1800 // Ignore if this is part of "template <enum ..." or "... -> enum" or 1801 // "template <..., enum ...>". 1802 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) { 1803 nextToken(); 1804 break; 1805 } 1806 1807 // parseEnum falls through and does not yet add an unwrapped line as an 1808 // enum definition can start a structural element. 1809 if (!parseEnum()) 1810 break; 1811 // This only applies to C++ and Verilog. 1812 if (!IsCpp && !Style.isVerilog()) { 1813 addUnwrappedLine(); 1814 return; 1815 } 1816 break; 1817 case tok::kw_typedef: 1818 nextToken(); 1819 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1820 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1821 Keywords.kw_CF_CLOSED_ENUM, 1822 Keywords.kw_NS_CLOSED_ENUM)) { 1823 parseEnum(); 1824 } 1825 break; 1826 case tok::kw_class: 1827 if (Style.isVerilog()) { 1828 parseBlock(); 1829 addUnwrappedLine(); 1830 return; 1831 } 1832 if (Style.isTableGen()) { 1833 // Do nothing special. In this case the l_brace becomes FunctionLBrace. 1834 // This is same as def and so on. 1835 nextToken(); 1836 break; 1837 } 1838 [[fallthrough]]; 1839 case tok::kw_struct: 1840 case tok::kw_union: 1841 if (parseStructLike()) 1842 return; 1843 break; 1844 case tok::kw_decltype: 1845 nextToken(); 1846 if (FormatTok->is(tok::l_paren)) { 1847 parseParens(); 1848 assert(FormatTok->Previous); 1849 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, 1850 tok::l_paren)) { 1851 Line->SeenDecltypeAuto = true; 1852 } 1853 } 1854 break; 1855 case tok::period: 1856 nextToken(); 1857 // In Java, classes have an implicit static member "class". 1858 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1859 FormatTok->is(tok::kw_class)) { 1860 nextToken(); 1861 } 1862 if (Style.isJavaScript() && FormatTok && 1863 FormatTok->Tok.getIdentifierInfo()) { 1864 // JavaScript only has pseudo keywords, all keywords are allowed to 1865 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1866 nextToken(); 1867 } 1868 break; 1869 case tok::semi: 1870 nextToken(); 1871 addUnwrappedLine(); 1872 return; 1873 case tok::r_brace: 1874 addUnwrappedLine(); 1875 return; 1876 case tok::l_paren: { 1877 parseParens(); 1878 // Break the unwrapped line if a K&R C function definition has a parameter 1879 // declaration. 1880 if (OpeningBrace || !IsCpp || !Previous || eof()) 1881 break; 1882 if (isC78ParameterDecl(FormatTok, 1883 Tokens->peekNextToken(/*SkipComment=*/true), 1884 Previous)) { 1885 addUnwrappedLine(); 1886 return; 1887 } 1888 break; 1889 } 1890 case tok::kw_operator: 1891 nextToken(); 1892 if (FormatTok->isBinaryOperator()) 1893 nextToken(); 1894 break; 1895 case tok::caret: 1896 nextToken(); 1897 // Block return type. 1898 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) { 1899 nextToken(); 1900 // Return types: pointers are ok too. 1901 while (FormatTok->is(tok::star)) 1902 nextToken(); 1903 } 1904 // Block argument list. 1905 if (FormatTok->is(tok::l_paren)) 1906 parseParens(); 1907 // Block body. 1908 if (FormatTok->is(tok::l_brace)) 1909 parseChildBlock(); 1910 break; 1911 case tok::l_brace: 1912 if (InRequiresExpression) 1913 FormatTok->setFinalizedType(TT_BracedListLBrace); 1914 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1915 IsDecltypeAutoFunction = Line->SeenDecltypeAuto; 1916 // A block outside of parentheses must be the last part of a 1917 // structural element. 1918 // FIXME: Figure out cases where this is not true, and add projections 1919 // for them (the one we know is missing are lambdas). 1920 if (Style.Language == FormatStyle::LK_Java && 1921 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1922 // If necessary, we could set the type to something different than 1923 // TT_FunctionLBrace. 1924 if (Style.BraceWrapping.AfterControlStatement == 1925 FormatStyle::BWACS_Always) { 1926 addUnwrappedLine(); 1927 } 1928 } else if (Style.BraceWrapping.AfterFunction) { 1929 addUnwrappedLine(); 1930 } 1931 if (!Previous || Previous->isNot(TT_TypeDeclarationParen)) 1932 FormatTok->setFinalizedType(TT_FunctionLBrace); 1933 parseBlock(); 1934 IsDecltypeAutoFunction = false; 1935 addUnwrappedLine(); 1936 return; 1937 } 1938 // Otherwise this was a braced init list, and the structural 1939 // element continues. 1940 break; 1941 case tok::kw_try: 1942 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1943 // field/method declaration. 1944 nextToken(); 1945 break; 1946 } 1947 // We arrive here when parsing function-try blocks. 1948 if (Style.BraceWrapping.AfterFunction) 1949 addUnwrappedLine(); 1950 parseTryCatch(); 1951 return; 1952 case tok::identifier: { 1953 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1954 Line->MustBeDeclaration) { 1955 addUnwrappedLine(); 1956 parseCSharpGenericTypeConstraint(); 1957 break; 1958 } 1959 if (FormatTok->is(TT_MacroBlockEnd)) { 1960 addUnwrappedLine(); 1961 return; 1962 } 1963 1964 // Function declarations (as opposed to function expressions) are parsed 1965 // on their own unwrapped line by continuing this loop. Function 1966 // expressions (functions that are not on their own line) must not create 1967 // a new unwrapped line, so they are special cased below. 1968 size_t TokenCount = Line->Tokens.size(); 1969 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1970 (TokenCount > 1 || 1971 (TokenCount == 1 && 1972 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) { 1973 tryToParseJSFunction(); 1974 break; 1975 } 1976 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1977 FormatTok->is(Keywords.kw_interface)) { 1978 if (Style.isJavaScript()) { 1979 // In JavaScript/TypeScript, "interface" can be used as a standalone 1980 // identifier, e.g. in `var interface = 1;`. If "interface" is 1981 // followed by another identifier, it is very like to be an actual 1982 // interface declaration. 1983 unsigned StoredPosition = Tokens->getPosition(); 1984 FormatToken *Next = Tokens->getNextToken(); 1985 FormatTok = Tokens->setPosition(StoredPosition); 1986 if (!mustBeJSIdent(Keywords, Next)) { 1987 nextToken(); 1988 break; 1989 } 1990 } 1991 parseRecord(); 1992 addUnwrappedLine(); 1993 return; 1994 } 1995 1996 if (Style.isVerilog()) { 1997 if (FormatTok->is(Keywords.kw_table)) { 1998 parseVerilogTable(); 1999 return; 2000 } 2001 if (Keywords.isVerilogBegin(*FormatTok) || 2002 Keywords.isVerilogHierarchy(*FormatTok)) { 2003 parseBlock(); 2004 addUnwrappedLine(); 2005 return; 2006 } 2007 } 2008 2009 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) { 2010 if (parseStructLike()) 2011 return; 2012 break; 2013 } 2014 2015 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 2016 parseStatementMacro(); 2017 return; 2018 } 2019 2020 // See if the following token should start a new unwrapped line. 2021 StringRef Text = FormatTok->TokenText; 2022 2023 FormatToken *PreviousToken = FormatTok; 2024 nextToken(); 2025 2026 // JS doesn't have macros, and within classes colons indicate fields, not 2027 // labels. 2028 if (Style.isJavaScript()) 2029 break; 2030 2031 auto OneTokenSoFar = [&]() { 2032 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 2033 while (I != E && I->Tok->is(tok::comment)) 2034 ++I; 2035 if (Style.isVerilog()) 2036 while (I != E && I->Tok->is(tok::hash)) 2037 ++I; 2038 return I != E && (++I == E); 2039 }; 2040 if (OneTokenSoFar()) { 2041 // Recognize function-like macro usages without trailing semicolon as 2042 // well as free-standing macros like Q_OBJECT. 2043 bool FunctionLike = FormatTok->is(tok::l_paren); 2044 if (FunctionLike) 2045 parseParens(); 2046 2047 bool FollowedByNewline = 2048 CommentsBeforeNextToken.empty() 2049 ? FormatTok->NewlinesBefore > 0 2050 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 2051 2052 if (FollowedByNewline && 2053 (Text.size() >= 5 || 2054 (FunctionLike && FormatTok->isNot(tok::l_paren))) && 2055 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 2056 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 2057 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 2058 addUnwrappedLine(); 2059 return; 2060 } 2061 } 2062 break; 2063 } 2064 case tok::equal: 2065 if ((Style.isJavaScript() || Style.isCSharp()) && 2066 FormatTok->is(TT_FatArrow)) { 2067 tryToParseChildBlock(); 2068 break; 2069 } 2070 2071 nextToken(); 2072 if (FormatTok->is(tok::l_brace)) { 2073 // Block kind should probably be set to BK_BracedInit for any language. 2074 // C# needs this change to ensure that array initialisers and object 2075 // initialisers are indented the same way. 2076 if (Style.isCSharp()) 2077 FormatTok->setBlockKind(BK_BracedInit); 2078 // TableGen's defset statement has syntax of the form, 2079 // `defset <type> <name> = { <statement>... }` 2080 if (Style.isTableGen() && 2081 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) { 2082 FormatTok->setFinalizedType(TT_FunctionLBrace); 2083 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2084 /*MunchSemi=*/false); 2085 addUnwrappedLine(); 2086 break; 2087 } 2088 nextToken(); 2089 parseBracedList(); 2090 } else if (Style.Language == FormatStyle::LK_Proto && 2091 FormatTok->is(tok::less)) { 2092 nextToken(); 2093 parseBracedList(/*IsAngleBracket=*/true); 2094 } 2095 break; 2096 case tok::l_square: 2097 parseSquare(); 2098 break; 2099 case tok::kw_new: 2100 parseNew(); 2101 break; 2102 case tok::kw_switch: 2103 if (Style.Language == FormatStyle::LK_Java) 2104 parseSwitch(/*IsExpr=*/true); 2105 else 2106 nextToken(); 2107 break; 2108 case tok::kw_case: 2109 // Proto: there are no switch/case statements. 2110 if (Style.Language == FormatStyle::LK_Proto) { 2111 nextToken(); 2112 return; 2113 } 2114 // In Verilog switch is called case. 2115 if (Style.isVerilog()) { 2116 parseBlock(); 2117 addUnwrappedLine(); 2118 return; 2119 } 2120 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2121 // 'case: string' field declaration. 2122 nextToken(); 2123 break; 2124 } 2125 parseCaseLabel(); 2126 break; 2127 case tok::kw_default: 2128 nextToken(); 2129 if (Style.isVerilog()) { 2130 if (FormatTok->is(tok::colon)) { 2131 // The label will be handled in the next iteration. 2132 break; 2133 } 2134 if (FormatTok->is(Keywords.kw_clocking)) { 2135 // A default clocking block. 2136 parseBlock(); 2137 addUnwrappedLine(); 2138 return; 2139 } 2140 parseVerilogCaseLabel(); 2141 return; 2142 } 2143 break; 2144 case tok::colon: 2145 nextToken(); 2146 if (Style.isVerilog()) { 2147 parseVerilogCaseLabel(); 2148 return; 2149 } 2150 break; 2151 case tok::greater: 2152 nextToken(); 2153 if (FormatTok->is(tok::l_brace)) 2154 FormatTok->Previous->setFinalizedType(TT_TemplateCloser); 2155 break; 2156 default: 2157 nextToken(); 2158 break; 2159 } 2160 } 2161 } 2162 2163 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2164 assert(FormatTok->is(tok::l_brace)); 2165 if (!Style.isCSharp()) 2166 return false; 2167 // See if it's a property accessor. 2168 if (!FormatTok->Previous || FormatTok->Previous->isNot(tok::identifier)) 2169 return false; 2170 2171 // See if we are inside a property accessor. 2172 // 2173 // Record the current tokenPosition so that we can advance and 2174 // reset the current token. `Next` is not set yet so we need 2175 // another way to advance along the token stream. 2176 unsigned int StoredPosition = Tokens->getPosition(); 2177 FormatToken *Tok = Tokens->getNextToken(); 2178 2179 // A trivial property accessor is of the form: 2180 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2181 // Track these as they do not require line breaks to be introduced. 2182 bool HasSpecialAccessor = false; 2183 bool IsTrivialPropertyAccessor = true; 2184 bool HasAttribute = false; 2185 while (!eof()) { 2186 if (const bool IsAccessorKeyword = 2187 Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set); 2188 IsAccessorKeyword || Tok->isAccessSpecifierKeyword() || 2189 Tok->isOneOf(tok::l_square, tok::semi, Keywords.kw_internal)) { 2190 if (IsAccessorKeyword) 2191 HasSpecialAccessor = true; 2192 else if (Tok->is(tok::l_square)) 2193 HasAttribute = true; 2194 Tok = Tokens->getNextToken(); 2195 continue; 2196 } 2197 if (Tok->isNot(tok::r_brace)) 2198 IsTrivialPropertyAccessor = false; 2199 break; 2200 } 2201 2202 if (!HasSpecialAccessor || HasAttribute) { 2203 Tokens->setPosition(StoredPosition); 2204 return false; 2205 } 2206 2207 // Try to parse the property accessor: 2208 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2209 Tokens->setPosition(StoredPosition); 2210 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2211 addUnwrappedLine(); 2212 nextToken(); 2213 do { 2214 switch (FormatTok->Tok.getKind()) { 2215 case tok::r_brace: 2216 nextToken(); 2217 if (FormatTok->is(tok::equal)) { 2218 while (!eof() && FormatTok->isNot(tok::semi)) 2219 nextToken(); 2220 nextToken(); 2221 } 2222 addUnwrappedLine(); 2223 return true; 2224 case tok::l_brace: 2225 ++Line->Level; 2226 parseBlock(/*MustBeDeclaration=*/true); 2227 addUnwrappedLine(); 2228 --Line->Level; 2229 break; 2230 case tok::equal: 2231 if (FormatTok->is(TT_FatArrow)) { 2232 ++Line->Level; 2233 do { 2234 nextToken(); 2235 } while (!eof() && FormatTok->isNot(tok::semi)); 2236 nextToken(); 2237 addUnwrappedLine(); 2238 --Line->Level; 2239 break; 2240 } 2241 nextToken(); 2242 break; 2243 default: 2244 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2245 Keywords.kw_set) && 2246 !IsTrivialPropertyAccessor) { 2247 // Non-trivial get/set needs to be on its own line. 2248 addUnwrappedLine(); 2249 } 2250 nextToken(); 2251 } 2252 } while (!eof()); 2253 2254 // Unreachable for well-formed code (paired '{' and '}'). 2255 return true; 2256 } 2257 2258 bool UnwrappedLineParser::tryToParseLambda() { 2259 assert(FormatTok->is(tok::l_square)); 2260 if (!IsCpp) { 2261 nextToken(); 2262 return false; 2263 } 2264 FormatToken &LSquare = *FormatTok; 2265 if (!tryToParseLambdaIntroducer()) 2266 return false; 2267 2268 bool SeenArrow = false; 2269 bool InTemplateParameterList = false; 2270 2271 while (FormatTok->isNot(tok::l_brace)) { 2272 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) { 2273 nextToken(); 2274 continue; 2275 } 2276 switch (FormatTok->Tok.getKind()) { 2277 case tok::l_brace: 2278 break; 2279 case tok::l_paren: 2280 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2281 break; 2282 case tok::l_square: 2283 parseSquare(); 2284 break; 2285 case tok::less: 2286 assert(FormatTok->Previous); 2287 if (FormatTok->Previous->is(tok::r_square)) 2288 InTemplateParameterList = true; 2289 nextToken(); 2290 break; 2291 case tok::kw_auto: 2292 case tok::kw_class: 2293 case tok::kw_struct: 2294 case tok::kw_union: 2295 case tok::kw_template: 2296 case tok::kw_typename: 2297 case tok::amp: 2298 case tok::star: 2299 case tok::kw_const: 2300 case tok::kw_constexpr: 2301 case tok::kw_consteval: 2302 case tok::comma: 2303 case tok::greater: 2304 case tok::identifier: 2305 case tok::numeric_constant: 2306 case tok::coloncolon: 2307 case tok::kw_mutable: 2308 case tok::kw_noexcept: 2309 case tok::kw_static: 2310 nextToken(); 2311 break; 2312 // Specialization of a template with an integer parameter can contain 2313 // arithmetic, logical, comparison and ternary operators. 2314 // 2315 // FIXME: This also accepts sequences of operators that are not in the scope 2316 // of a template argument list. 2317 // 2318 // In a C++ lambda a template type can only occur after an arrow. We use 2319 // this as an heuristic to distinguish between Objective-C expressions 2320 // followed by an `a->b` expression, such as: 2321 // ([obj func:arg] + a->b) 2322 // Otherwise the code below would parse as a lambda. 2323 case tok::plus: 2324 case tok::minus: 2325 case tok::exclaim: 2326 case tok::tilde: 2327 case tok::slash: 2328 case tok::percent: 2329 case tok::lessless: 2330 case tok::pipe: 2331 case tok::pipepipe: 2332 case tok::ampamp: 2333 case tok::caret: 2334 case tok::equalequal: 2335 case tok::exclaimequal: 2336 case tok::greaterequal: 2337 case tok::lessequal: 2338 case tok::question: 2339 case tok::colon: 2340 case tok::ellipsis: 2341 case tok::kw_true: 2342 case tok::kw_false: 2343 if (SeenArrow || InTemplateParameterList) { 2344 nextToken(); 2345 break; 2346 } 2347 return true; 2348 case tok::arrow: 2349 // This might or might not actually be a lambda arrow (this could be an 2350 // ObjC method invocation followed by a dereferencing arrow). We might 2351 // reset this back to TT_Unknown in TokenAnnotator. 2352 FormatTok->setFinalizedType(TT_LambdaArrow); 2353 SeenArrow = true; 2354 nextToken(); 2355 break; 2356 case tok::kw_requires: { 2357 auto *RequiresToken = FormatTok; 2358 nextToken(); 2359 parseRequiresClause(RequiresToken); 2360 break; 2361 } 2362 case tok::equal: 2363 if (!InTemplateParameterList) 2364 return true; 2365 nextToken(); 2366 break; 2367 default: 2368 return true; 2369 } 2370 } 2371 2372 FormatTok->setFinalizedType(TT_LambdaLBrace); 2373 LSquare.setFinalizedType(TT_LambdaLSquare); 2374 2375 NestedLambdas.push_back(Line->SeenDecltypeAuto); 2376 parseChildBlock(); 2377 assert(!NestedLambdas.empty()); 2378 NestedLambdas.pop_back(); 2379 2380 return true; 2381 } 2382 2383 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2384 const FormatToken *Previous = FormatTok->Previous; 2385 const FormatToken *LeftSquare = FormatTok; 2386 nextToken(); 2387 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2388 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2389 tok::kw_co_yield, tok::kw_co_return)) || 2390 Previous->closesScope())) || 2391 LeftSquare->isCppStructuredBinding(IsCpp)) { 2392 return false; 2393 } 2394 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) 2395 return false; 2396 if (FormatTok->is(tok::r_square)) { 2397 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2398 if (Next->is(tok::greater)) 2399 return false; 2400 } 2401 parseSquare(/*LambdaIntroducer=*/true); 2402 return true; 2403 } 2404 2405 void UnwrappedLineParser::tryToParseJSFunction() { 2406 assert(FormatTok->is(Keywords.kw_function)); 2407 if (FormatTok->is(Keywords.kw_async)) 2408 nextToken(); 2409 // Consume "function". 2410 nextToken(); 2411 2412 // Consume * (generator function). Treat it like C++'s overloaded operators. 2413 if (FormatTok->is(tok::star)) { 2414 FormatTok->setFinalizedType(TT_OverloadedOperator); 2415 nextToken(); 2416 } 2417 2418 // Consume function name. 2419 if (FormatTok->is(tok::identifier)) 2420 nextToken(); 2421 2422 if (FormatTok->isNot(tok::l_paren)) 2423 return; 2424 2425 // Parse formal parameter list. 2426 parseParens(); 2427 2428 if (FormatTok->is(tok::colon)) { 2429 // Parse a type definition. 2430 nextToken(); 2431 2432 // Eat the type declaration. For braced inline object types, balance braces, 2433 // otherwise just parse until finding an l_brace for the function body. 2434 if (FormatTok->is(tok::l_brace)) 2435 tryToParseBracedList(); 2436 else 2437 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2438 nextToken(); 2439 } 2440 2441 if (FormatTok->is(tok::semi)) 2442 return; 2443 2444 parseChildBlock(); 2445 } 2446 2447 bool UnwrappedLineParser::tryToParseBracedList() { 2448 if (FormatTok->is(BK_Unknown)) 2449 calculateBraceTypes(); 2450 assert(FormatTok->isNot(BK_Unknown)); 2451 if (FormatTok->is(BK_Block)) 2452 return false; 2453 nextToken(); 2454 parseBracedList(); 2455 return true; 2456 } 2457 2458 bool UnwrappedLineParser::tryToParseChildBlock() { 2459 assert(Style.isJavaScript() || Style.isCSharp()); 2460 assert(FormatTok->is(TT_FatArrow)); 2461 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2462 // They always start an expression or a child block if followed by a curly 2463 // brace. 2464 nextToken(); 2465 if (FormatTok->isNot(tok::l_brace)) 2466 return false; 2467 parseChildBlock(); 2468 return true; 2469 } 2470 2471 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { 2472 assert(!IsAngleBracket || !IsEnum); 2473 bool HasError = false; 2474 2475 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2476 // replace this by using parseAssignmentExpression() inside. 2477 do { 2478 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2479 tryToParseChildBlock()) { 2480 continue; 2481 } 2482 if (Style.isJavaScript()) { 2483 if (FormatTok->is(Keywords.kw_function)) { 2484 tryToParseJSFunction(); 2485 continue; 2486 } 2487 if (FormatTok->is(tok::l_brace)) { 2488 // Could be a method inside of a braced list `{a() { return 1; }}`. 2489 if (tryToParseBracedList()) 2490 continue; 2491 parseChildBlock(); 2492 } 2493 } 2494 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) { 2495 if (IsEnum) { 2496 FormatTok->setBlockKind(BK_Block); 2497 if (!Style.AllowShortEnumsOnASingleLine) 2498 addUnwrappedLine(); 2499 } 2500 nextToken(); 2501 return !HasError; 2502 } 2503 switch (FormatTok->Tok.getKind()) { 2504 case tok::l_square: 2505 if (Style.isCSharp()) 2506 parseSquare(); 2507 else 2508 tryToParseLambda(); 2509 break; 2510 case tok::l_paren: 2511 parseParens(); 2512 // JavaScript can just have free standing methods and getters/setters in 2513 // object literals. Detect them by a "{" following ")". 2514 if (Style.isJavaScript()) { 2515 if (FormatTok->is(tok::l_brace)) 2516 parseChildBlock(); 2517 break; 2518 } 2519 break; 2520 case tok::l_brace: 2521 // Assume there are no blocks inside a braced init list apart 2522 // from the ones we explicitly parse out (like lambdas). 2523 FormatTok->setBlockKind(BK_BracedInit); 2524 if (!IsAngleBracket) { 2525 auto *Prev = FormatTok->Previous; 2526 if (Prev && Prev->is(tok::greater)) 2527 Prev->setFinalizedType(TT_TemplateCloser); 2528 } 2529 nextToken(); 2530 parseBracedList(); 2531 break; 2532 case tok::less: 2533 nextToken(); 2534 if (IsAngleBracket) 2535 parseBracedList(/*IsAngleBracket=*/true); 2536 break; 2537 case tok::semi: 2538 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2539 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2540 // used for error recovery if we have otherwise determined that this is 2541 // a braced list. 2542 if (Style.isJavaScript()) { 2543 nextToken(); 2544 break; 2545 } 2546 HasError = true; 2547 if (!IsEnum) 2548 return false; 2549 nextToken(); 2550 break; 2551 case tok::comma: 2552 nextToken(); 2553 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2554 addUnwrappedLine(); 2555 break; 2556 default: 2557 nextToken(); 2558 break; 2559 } 2560 } while (!eof()); 2561 return false; 2562 } 2563 2564 /// \brief Parses a pair of parentheses (and everything between them). 2565 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2566 /// double ampersands. This applies for all nested scopes as well. 2567 /// 2568 /// Returns whether there is a `=` token between the parentheses. 2569 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2570 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2571 auto *LeftParen = FormatTok; 2572 bool SeenComma = false; 2573 bool SeenEqual = false; 2574 bool MightBeFoldExpr = false; 2575 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2576 nextToken(); 2577 do { 2578 switch (FormatTok->Tok.getKind()) { 2579 case tok::l_paren: 2580 if (parseParens(AmpAmpTokenType)) 2581 SeenEqual = true; 2582 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2583 parseChildBlock(); 2584 break; 2585 case tok::r_paren: { 2586 auto *Prev = LeftParen->Previous; 2587 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && 2588 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2589 const auto *Next = Tokens->peekNextToken(); 2590 const bool DoubleParens = 2591 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2592 const bool CommaSeparated = 2593 !DoubleParens && Prev && Prev->isOneOf(tok::l_paren, tok::comma) && 2594 Next && Next->isOneOf(tok::comma, tok::r_paren); 2595 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2596 const bool Excluded = 2597 PrevPrev && 2598 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || 2599 SeenComma || 2600 (SeenEqual && 2601 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2602 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2603 const bool ReturnParens = 2604 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2605 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || 2606 (!NestedLambdas.empty() && !NestedLambdas.back())) && 2607 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2608 Next->is(tok::semi); 2609 if ((DoubleParens && !Excluded) || (CommaSeparated && !SeenComma) || 2610 ReturnParens) { 2611 LeftParen->Optional = true; 2612 FormatTok->Optional = true; 2613 } 2614 } 2615 if (Prev) { 2616 if (Prev->is(TT_TypenameMacro)) { 2617 LeftParen->setFinalizedType(TT_TypeDeclarationParen); 2618 FormatTok->setFinalizedType(TT_TypeDeclarationParen); 2619 } else if (Prev->is(tok::greater) && FormatTok->Previous == LeftParen) { 2620 Prev->setFinalizedType(TT_TemplateCloser); 2621 } 2622 } 2623 nextToken(); 2624 return SeenEqual; 2625 } 2626 case tok::r_brace: 2627 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2628 return SeenEqual; 2629 case tok::l_square: 2630 tryToParseLambda(); 2631 break; 2632 case tok::l_brace: 2633 if (!tryToParseBracedList()) 2634 parseChildBlock(); 2635 break; 2636 case tok::at: 2637 nextToken(); 2638 if (FormatTok->is(tok::l_brace)) { 2639 nextToken(); 2640 parseBracedList(); 2641 } 2642 break; 2643 case tok::comma: 2644 SeenComma = true; 2645 nextToken(); 2646 break; 2647 case tok::ellipsis: 2648 MightBeFoldExpr = true; 2649 nextToken(); 2650 break; 2651 case tok::equal: 2652 SeenEqual = true; 2653 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2654 tryToParseChildBlock(); 2655 else 2656 nextToken(); 2657 break; 2658 case tok::kw_class: 2659 if (Style.isJavaScript()) 2660 parseRecord(/*ParseAsExpr=*/true); 2661 else 2662 nextToken(); 2663 break; 2664 case tok::identifier: 2665 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function))) 2666 tryToParseJSFunction(); 2667 else 2668 nextToken(); 2669 break; 2670 case tok::kw_switch: 2671 if (Style.Language == FormatStyle::LK_Java) 2672 parseSwitch(/*IsExpr=*/true); 2673 else 2674 nextToken(); 2675 break; 2676 case tok::kw_requires: { 2677 auto RequiresToken = FormatTok; 2678 nextToken(); 2679 parseRequiresExpression(RequiresToken); 2680 break; 2681 } 2682 case tok::ampamp: 2683 if (AmpAmpTokenType != TT_Unknown) 2684 FormatTok->setFinalizedType(AmpAmpTokenType); 2685 [[fallthrough]]; 2686 default: 2687 nextToken(); 2688 break; 2689 } 2690 } while (!eof()); 2691 return SeenEqual; 2692 } 2693 2694 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2695 if (!LambdaIntroducer) { 2696 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2697 if (tryToParseLambda()) 2698 return; 2699 } 2700 do { 2701 switch (FormatTok->Tok.getKind()) { 2702 case tok::l_paren: 2703 parseParens(); 2704 break; 2705 case tok::r_square: 2706 nextToken(); 2707 return; 2708 case tok::r_brace: 2709 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2710 return; 2711 case tok::l_square: 2712 parseSquare(); 2713 break; 2714 case tok::l_brace: { 2715 if (!tryToParseBracedList()) 2716 parseChildBlock(); 2717 break; 2718 } 2719 case tok::at: 2720 case tok::colon: 2721 nextToken(); 2722 if (FormatTok->is(tok::l_brace)) { 2723 nextToken(); 2724 parseBracedList(); 2725 } 2726 break; 2727 default: 2728 nextToken(); 2729 break; 2730 } 2731 } while (!eof()); 2732 } 2733 2734 void UnwrappedLineParser::keepAncestorBraces() { 2735 if (!Style.RemoveBracesLLVM) 2736 return; 2737 2738 const int MaxNestingLevels = 2; 2739 const int Size = NestedTooDeep.size(); 2740 if (Size >= MaxNestingLevels) 2741 NestedTooDeep[Size - MaxNestingLevels] = true; 2742 NestedTooDeep.push_back(false); 2743 } 2744 2745 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2746 for (const auto &Token : llvm::reverse(Line.Tokens)) 2747 if (Token.Tok->isNot(tok::comment)) 2748 return Token.Tok; 2749 2750 return nullptr; 2751 } 2752 2753 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2754 FormatToken *Tok = nullptr; 2755 2756 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2757 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2758 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2759 ? getLastNonComment(*Line) 2760 : Line->Tokens.back().Tok; 2761 assert(Tok); 2762 if (Tok->BraceCount < 0) { 2763 assert(Tok->BraceCount == -1); 2764 Tok = nullptr; 2765 } else { 2766 Tok->BraceCount = -1; 2767 } 2768 } 2769 2770 addUnwrappedLine(); 2771 ++Line->Level; 2772 ++Line->UnbracedBodyLevel; 2773 parseStructuralElement(); 2774 --Line->UnbracedBodyLevel; 2775 2776 if (Tok) { 2777 assert(!Line->InPPDirective); 2778 Tok = nullptr; 2779 for (const auto &L : llvm::reverse(*CurrentLines)) { 2780 if (!L.InPPDirective && getLastNonComment(L)) { 2781 Tok = L.Tokens.back().Tok; 2782 break; 2783 } 2784 } 2785 assert(Tok); 2786 ++Tok->BraceCount; 2787 } 2788 2789 if (CheckEOF && eof()) 2790 addUnwrappedLine(); 2791 2792 --Line->Level; 2793 } 2794 2795 static void markOptionalBraces(FormatToken *LeftBrace) { 2796 if (!LeftBrace) 2797 return; 2798 2799 assert(LeftBrace->is(tok::l_brace)); 2800 2801 FormatToken *RightBrace = LeftBrace->MatchingParen; 2802 if (!RightBrace) { 2803 assert(!LeftBrace->Optional); 2804 return; 2805 } 2806 2807 assert(RightBrace->is(tok::r_brace)); 2808 assert(RightBrace->MatchingParen == LeftBrace); 2809 assert(LeftBrace->Optional == RightBrace->Optional); 2810 2811 LeftBrace->Optional = true; 2812 RightBrace->Optional = true; 2813 } 2814 2815 void UnwrappedLineParser::handleAttributes() { 2816 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2817 if (FormatTok->isAttribute()) 2818 nextToken(); 2819 else if (FormatTok->is(tok::l_square)) 2820 handleCppAttributes(); 2821 } 2822 2823 bool UnwrappedLineParser::handleCppAttributes() { 2824 // Handle [[likely]] / [[unlikely]] attributes. 2825 assert(FormatTok->is(tok::l_square)); 2826 if (!tryToParseSimpleAttribute()) 2827 return false; 2828 parseSquare(); 2829 return true; 2830 } 2831 2832 /// Returns whether \c Tok begins a block. 2833 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2834 // FIXME: rename the function or make 2835 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2836 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2837 : Tok.is(tok::l_brace); 2838 } 2839 2840 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2841 bool KeepBraces, 2842 bool IsVerilogAssert) { 2843 assert((FormatTok->is(tok::kw_if) || 2844 (Style.isVerilog() && 2845 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2846 Keywords.kw_assume, Keywords.kw_cover))) && 2847 "'if' expected"); 2848 nextToken(); 2849 2850 if (IsVerilogAssert) { 2851 // Handle `assert #0` and `assert final`. 2852 if (FormatTok->is(Keywords.kw_verilogHash)) { 2853 nextToken(); 2854 if (FormatTok->is(tok::numeric_constant)) 2855 nextToken(); 2856 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2857 Keywords.kw_sequence)) { 2858 nextToken(); 2859 } 2860 } 2861 2862 // TableGen's if statement has the form of `if <cond> then { ... }`. 2863 if (Style.isTableGen()) { 2864 while (!eof() && FormatTok->isNot(Keywords.kw_then)) { 2865 // Simply skip until then. This range only contains a value. 2866 nextToken(); 2867 } 2868 } 2869 2870 // Handle `if !consteval`. 2871 if (FormatTok->is(tok::exclaim)) 2872 nextToken(); 2873 2874 bool KeepIfBraces = true; 2875 if (FormatTok->is(tok::kw_consteval)) { 2876 nextToken(); 2877 } else { 2878 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2879 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2880 nextToken(); 2881 if (FormatTok->is(tok::l_paren)) { 2882 FormatTok->setFinalizedType(TT_ConditionLParen); 2883 parseParens(); 2884 } 2885 } 2886 handleAttributes(); 2887 // The then action is optional in Verilog assert statements. 2888 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2889 nextToken(); 2890 addUnwrappedLine(); 2891 return nullptr; 2892 } 2893 2894 bool NeedsUnwrappedLine = false; 2895 keepAncestorBraces(); 2896 2897 FormatToken *IfLeftBrace = nullptr; 2898 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2899 2900 if (isBlockBegin(*FormatTok)) { 2901 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2902 IfLeftBrace = FormatTok; 2903 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2904 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2905 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2906 setPreviousRBraceType(TT_ControlStatementRBrace); 2907 if (Style.BraceWrapping.BeforeElse) 2908 addUnwrappedLine(); 2909 else 2910 NeedsUnwrappedLine = true; 2911 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2912 addUnwrappedLine(); 2913 } else { 2914 parseUnbracedBody(); 2915 } 2916 2917 if (Style.RemoveBracesLLVM) { 2918 assert(!NestedTooDeep.empty()); 2919 KeepIfBraces = KeepIfBraces || 2920 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2921 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2922 IfBlockKind == IfStmtKind::IfElseIf; 2923 } 2924 2925 bool KeepElseBraces = KeepIfBraces; 2926 FormatToken *ElseLeftBrace = nullptr; 2927 IfStmtKind Kind = IfStmtKind::IfOnly; 2928 2929 if (FormatTok->is(tok::kw_else)) { 2930 if (Style.RemoveBracesLLVM) { 2931 NestedTooDeep.back() = false; 2932 Kind = IfStmtKind::IfElse; 2933 } 2934 nextToken(); 2935 handleAttributes(); 2936 if (isBlockBegin(*FormatTok)) { 2937 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2938 FormatTok->setFinalizedType(TT_ElseLBrace); 2939 ElseLeftBrace = FormatTok; 2940 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2941 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2942 FormatToken *IfLBrace = 2943 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2944 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2945 setPreviousRBraceType(TT_ElseRBrace); 2946 if (FormatTok->is(tok::kw_else)) { 2947 KeepElseBraces = KeepElseBraces || 2948 ElseBlockKind == IfStmtKind::IfOnly || 2949 ElseBlockKind == IfStmtKind::IfElseIf; 2950 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2951 KeepElseBraces = true; 2952 assert(ElseLeftBrace->MatchingParen); 2953 markOptionalBraces(ElseLeftBrace); 2954 } 2955 addUnwrappedLine(); 2956 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2957 const FormatToken *Previous = Tokens->getPreviousToken(); 2958 assert(Previous); 2959 const bool IsPrecededByComment = Previous->is(tok::comment); 2960 if (IsPrecededByComment) { 2961 addUnwrappedLine(); 2962 ++Line->Level; 2963 } 2964 bool TooDeep = true; 2965 if (Style.RemoveBracesLLVM) { 2966 Kind = IfStmtKind::IfElseIf; 2967 TooDeep = NestedTooDeep.pop_back_val(); 2968 } 2969 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2970 if (Style.RemoveBracesLLVM) 2971 NestedTooDeep.push_back(TooDeep); 2972 if (IsPrecededByComment) 2973 --Line->Level; 2974 } else { 2975 parseUnbracedBody(/*CheckEOF=*/true); 2976 } 2977 } else { 2978 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2979 if (NeedsUnwrappedLine) 2980 addUnwrappedLine(); 2981 } 2982 2983 if (!Style.RemoveBracesLLVM) 2984 return nullptr; 2985 2986 assert(!NestedTooDeep.empty()); 2987 KeepElseBraces = KeepElseBraces || 2988 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2989 NestedTooDeep.back(); 2990 2991 NestedTooDeep.pop_back(); 2992 2993 if (!KeepIfBraces && !KeepElseBraces) { 2994 markOptionalBraces(IfLeftBrace); 2995 markOptionalBraces(ElseLeftBrace); 2996 } else if (IfLeftBrace) { 2997 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2998 if (IfRightBrace) { 2999 assert(IfRightBrace->MatchingParen == IfLeftBrace); 3000 assert(!IfLeftBrace->Optional); 3001 assert(!IfRightBrace->Optional); 3002 IfLeftBrace->MatchingParen = nullptr; 3003 IfRightBrace->MatchingParen = nullptr; 3004 } 3005 } 3006 3007 if (IfKind) 3008 *IfKind = Kind; 3009 3010 return IfLeftBrace; 3011 } 3012 3013 void UnwrappedLineParser::parseTryCatch() { 3014 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 3015 nextToken(); 3016 bool NeedsUnwrappedLine = false; 3017 bool HasCtorInitializer = false; 3018 if (FormatTok->is(tok::colon)) { 3019 auto *Colon = FormatTok; 3020 // We are in a function try block, what comes is an initializer list. 3021 nextToken(); 3022 if (FormatTok->is(tok::identifier)) { 3023 HasCtorInitializer = true; 3024 Colon->setFinalizedType(TT_CtorInitializerColon); 3025 } 3026 3027 // In case identifiers were removed by clang-tidy, what might follow is 3028 // multiple commas in sequence - before the first identifier. 3029 while (FormatTok->is(tok::comma)) 3030 nextToken(); 3031 3032 while (FormatTok->is(tok::identifier)) { 3033 nextToken(); 3034 if (FormatTok->is(tok::l_paren)) { 3035 parseParens(); 3036 } else if (FormatTok->is(tok::l_brace)) { 3037 nextToken(); 3038 parseBracedList(); 3039 } 3040 3041 // In case identifiers were removed by clang-tidy, what might follow is 3042 // multiple commas in sequence - after the first identifier. 3043 while (FormatTok->is(tok::comma)) 3044 nextToken(); 3045 } 3046 } 3047 // Parse try with resource. 3048 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 3049 parseParens(); 3050 3051 keepAncestorBraces(); 3052 3053 if (FormatTok->is(tok::l_brace)) { 3054 if (HasCtorInitializer) 3055 FormatTok->setFinalizedType(TT_FunctionLBrace); 3056 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3057 parseBlock(); 3058 if (Style.BraceWrapping.BeforeCatch) 3059 addUnwrappedLine(); 3060 else 3061 NeedsUnwrappedLine = true; 3062 } else if (FormatTok->isNot(tok::kw_catch)) { 3063 // The C++ standard requires a compound-statement after a try. 3064 // If there's none, we try to assume there's a structuralElement 3065 // and try to continue. 3066 addUnwrappedLine(); 3067 ++Line->Level; 3068 parseStructuralElement(); 3069 --Line->Level; 3070 } 3071 while (true) { 3072 if (FormatTok->is(tok::at)) 3073 nextToken(); 3074 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 3075 tok::kw___finally) || 3076 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3077 FormatTok->is(Keywords.kw_finally)) || 3078 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 3079 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 3080 break; 3081 } 3082 nextToken(); 3083 while (FormatTok->isNot(tok::l_brace)) { 3084 if (FormatTok->is(tok::l_paren)) { 3085 parseParens(); 3086 continue; 3087 } 3088 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 3089 if (Style.RemoveBracesLLVM) 3090 NestedTooDeep.pop_back(); 3091 return; 3092 } 3093 nextToken(); 3094 } 3095 NeedsUnwrappedLine = false; 3096 Line->MustBeDeclaration = false; 3097 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3098 parseBlock(); 3099 if (Style.BraceWrapping.BeforeCatch) 3100 addUnwrappedLine(); 3101 else 3102 NeedsUnwrappedLine = true; 3103 } 3104 3105 if (Style.RemoveBracesLLVM) 3106 NestedTooDeep.pop_back(); 3107 3108 if (NeedsUnwrappedLine) 3109 addUnwrappedLine(); 3110 } 3111 3112 void UnwrappedLineParser::parseNamespaceOrExportBlock(unsigned AddLevels) { 3113 bool ManageWhitesmithsBraces = 3114 AddLevels == 0u && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3115 3116 // If we're in Whitesmiths mode, indent the brace if we're not indenting 3117 // the whole block. 3118 if (ManageWhitesmithsBraces) 3119 ++Line->Level; 3120 3121 // Munch the semicolon after the block. This is more common than one would 3122 // think. Putting the semicolon into its own line is very ugly. 3123 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 3124 /*KeepBraces=*/true, /*IfKind=*/nullptr, ManageWhitesmithsBraces); 3125 3126 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 3127 3128 if (ManageWhitesmithsBraces) 3129 --Line->Level; 3130 } 3131 3132 void UnwrappedLineParser::parseNamespace() { 3133 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 3134 "'namespace' expected"); 3135 3136 const FormatToken &InitialToken = *FormatTok; 3137 nextToken(); 3138 if (InitialToken.is(TT_NamespaceMacro)) { 3139 parseParens(); 3140 } else { 3141 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 3142 tok::l_square, tok::period, tok::l_paren) || 3143 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 3144 if (FormatTok->is(tok::l_square)) 3145 parseSquare(); 3146 else if (FormatTok->is(tok::l_paren)) 3147 parseParens(); 3148 else 3149 nextToken(); 3150 } 3151 } 3152 if (FormatTok->is(tok::l_brace)) { 3153 FormatTok->setFinalizedType(TT_NamespaceLBrace); 3154 3155 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3156 addUnwrappedLine(); 3157 3158 unsigned AddLevels = 3159 Style.NamespaceIndentation == FormatStyle::NI_All || 3160 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 3161 DeclarationScopeStack.size() > 1) 3162 ? 1u 3163 : 0u; 3164 parseNamespaceOrExportBlock(AddLevels); 3165 } 3166 // FIXME: Add error handling. 3167 } 3168 3169 void UnwrappedLineParser::parseCppExportBlock() { 3170 parseNamespaceOrExportBlock(/*AddLevels=*/Style.IndentExportBlock ? 1 : 0); 3171 } 3172 3173 void UnwrappedLineParser::parseNew() { 3174 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 3175 nextToken(); 3176 3177 if (Style.isCSharp()) { 3178 do { 3179 // Handle constructor invocation, e.g. `new(field: value)`. 3180 if (FormatTok->is(tok::l_paren)) 3181 parseParens(); 3182 3183 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3184 if (FormatTok->is(tok::l_brace)) 3185 parseBracedList(); 3186 3187 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3188 return; 3189 3190 nextToken(); 3191 } while (!eof()); 3192 } 3193 3194 if (Style.Language != FormatStyle::LK_Java) 3195 return; 3196 3197 // In Java, we can parse everything up to the parens, which aren't optional. 3198 do { 3199 // There should not be a ;, { or } before the new's open paren. 3200 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3201 return; 3202 3203 // Consume the parens. 3204 if (FormatTok->is(tok::l_paren)) { 3205 parseParens(); 3206 3207 // If there is a class body of an anonymous class, consume that as child. 3208 if (FormatTok->is(tok::l_brace)) 3209 parseChildBlock(); 3210 return; 3211 } 3212 nextToken(); 3213 } while (!eof()); 3214 } 3215 3216 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3217 keepAncestorBraces(); 3218 3219 if (isBlockBegin(*FormatTok)) { 3220 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3221 FormatToken *LeftBrace = FormatTok; 3222 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3223 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3224 /*MunchSemi=*/true, KeepBraces); 3225 setPreviousRBraceType(TT_ControlStatementRBrace); 3226 if (!KeepBraces) { 3227 assert(!NestedTooDeep.empty()); 3228 if (!NestedTooDeep.back()) 3229 markOptionalBraces(LeftBrace); 3230 } 3231 if (WrapRightBrace) 3232 addUnwrappedLine(); 3233 } else { 3234 parseUnbracedBody(); 3235 } 3236 3237 if (!KeepBraces) 3238 NestedTooDeep.pop_back(); 3239 } 3240 3241 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3242 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3243 (Style.isVerilog() && 3244 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3245 Keywords.kw_always_ff, Keywords.kw_always_latch, 3246 Keywords.kw_final, Keywords.kw_initial, 3247 Keywords.kw_foreach, Keywords.kw_forever, 3248 Keywords.kw_repeat))) && 3249 "'for', 'while' or foreach macro expected"); 3250 const bool KeepBraces = !Style.RemoveBracesLLVM || 3251 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3252 3253 nextToken(); 3254 // JS' for await ( ... 3255 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3256 nextToken(); 3257 if (IsCpp && FormatTok->is(tok::kw_co_await)) 3258 nextToken(); 3259 if (HasParens && FormatTok->is(tok::l_paren)) { 3260 // The type is only set for Verilog basically because we were afraid to 3261 // change the existing behavior for loops. See the discussion on D121756 for 3262 // details. 3263 if (Style.isVerilog()) 3264 FormatTok->setFinalizedType(TT_ConditionLParen); 3265 parseParens(); 3266 } 3267 3268 if (Style.isVerilog()) { 3269 // Event control. 3270 parseVerilogSensitivityList(); 3271 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) && 3272 Tokens->getPreviousToken()->is(tok::r_paren)) { 3273 nextToken(); 3274 addUnwrappedLine(); 3275 return; 3276 } 3277 3278 handleAttributes(); 3279 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3280 } 3281 3282 void UnwrappedLineParser::parseDoWhile() { 3283 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3284 nextToken(); 3285 3286 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3287 3288 // FIXME: Add error handling. 3289 if (FormatTok->isNot(tok::kw_while)) { 3290 addUnwrappedLine(); 3291 return; 3292 } 3293 3294 FormatTok->setFinalizedType(TT_DoWhile); 3295 3296 // If in Whitesmiths mode, the line with the while() needs to be indented 3297 // to the same level as the block. 3298 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3299 ++Line->Level; 3300 3301 nextToken(); 3302 parseStructuralElement(); 3303 } 3304 3305 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3306 nextToken(); 3307 unsigned OldLineLevel = Line->Level; 3308 3309 if (LeftAlignLabel) 3310 Line->Level = 0; 3311 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3312 --Line->Level; 3313 3314 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3315 FormatTok->is(tok::l_brace)) { 3316 3317 CompoundStatementIndenter Indenter(this, Line->Level, 3318 Style.BraceWrapping.AfterCaseLabel, 3319 Style.BraceWrapping.IndentBraces); 3320 parseBlock(); 3321 if (FormatTok->is(tok::kw_break)) { 3322 if (Style.BraceWrapping.AfterControlStatement == 3323 FormatStyle::BWACS_Always) { 3324 addUnwrappedLine(); 3325 if (!Style.IndentCaseBlocks && 3326 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3327 ++Line->Level; 3328 } 3329 } 3330 parseStructuralElement(); 3331 } 3332 addUnwrappedLine(); 3333 } else { 3334 if (FormatTok->is(tok::semi)) 3335 nextToken(); 3336 addUnwrappedLine(); 3337 } 3338 Line->Level = OldLineLevel; 3339 if (FormatTok->isNot(tok::l_brace)) { 3340 parseStructuralElement(); 3341 addUnwrappedLine(); 3342 } 3343 } 3344 3345 void UnwrappedLineParser::parseCaseLabel() { 3346 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3347 auto *Case = FormatTok; 3348 3349 // FIXME: fix handling of complex expressions here. 3350 do { 3351 nextToken(); 3352 if (FormatTok->is(tok::colon)) { 3353 FormatTok->setFinalizedType(TT_CaseLabelColon); 3354 break; 3355 } 3356 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) { 3357 FormatTok->setFinalizedType(TT_CaseLabelArrow); 3358 Case->setFinalizedType(TT_SwitchExpressionLabel); 3359 break; 3360 } 3361 } while (!eof()); 3362 parseLabel(); 3363 } 3364 3365 void UnwrappedLineParser::parseSwitch(bool IsExpr) { 3366 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3367 nextToken(); 3368 if (FormatTok->is(tok::l_paren)) 3369 parseParens(); 3370 3371 keepAncestorBraces(); 3372 3373 if (FormatTok->is(tok::l_brace)) { 3374 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3375 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace 3376 : TT_ControlStatementLBrace); 3377 if (IsExpr) 3378 parseChildBlock(); 3379 else 3380 parseBlock(); 3381 setPreviousRBraceType(TT_ControlStatementRBrace); 3382 if (!IsExpr) 3383 addUnwrappedLine(); 3384 } else { 3385 addUnwrappedLine(); 3386 ++Line->Level; 3387 parseStructuralElement(); 3388 --Line->Level; 3389 } 3390 3391 if (Style.RemoveBracesLLVM) 3392 NestedTooDeep.pop_back(); 3393 } 3394 3395 // Operators that can follow a C variable. 3396 static bool isCOperatorFollowingVar(tok::TokenKind Kind) { 3397 switch (Kind) { 3398 case tok::ampamp: 3399 case tok::ampequal: 3400 case tok::arrow: 3401 case tok::caret: 3402 case tok::caretequal: 3403 case tok::comma: 3404 case tok::ellipsis: 3405 case tok::equal: 3406 case tok::equalequal: 3407 case tok::exclaim: 3408 case tok::exclaimequal: 3409 case tok::greater: 3410 case tok::greaterequal: 3411 case tok::greatergreater: 3412 case tok::greatergreaterequal: 3413 case tok::l_paren: 3414 case tok::l_square: 3415 case tok::less: 3416 case tok::lessequal: 3417 case tok::lessless: 3418 case tok::lesslessequal: 3419 case tok::minus: 3420 case tok::minusequal: 3421 case tok::minusminus: 3422 case tok::percent: 3423 case tok::percentequal: 3424 case tok::period: 3425 case tok::pipe: 3426 case tok::pipeequal: 3427 case tok::pipepipe: 3428 case tok::plus: 3429 case tok::plusequal: 3430 case tok::plusplus: 3431 case tok::question: 3432 case tok::r_brace: 3433 case tok::r_paren: 3434 case tok::r_square: 3435 case tok::semi: 3436 case tok::slash: 3437 case tok::slashequal: 3438 case tok::star: 3439 case tok::starequal: 3440 return true; 3441 default: 3442 return false; 3443 } 3444 } 3445 3446 void UnwrappedLineParser::parseAccessSpecifier() { 3447 FormatToken *AccessSpecifierCandidate = FormatTok; 3448 nextToken(); 3449 // Understand Qt's slots. 3450 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3451 nextToken(); 3452 // Otherwise, we don't know what it is, and we'd better keep the next token. 3453 if (FormatTok->is(tok::colon)) { 3454 nextToken(); 3455 addUnwrappedLine(); 3456 } else if (FormatTok->isNot(tok::coloncolon) && 3457 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3458 // Not a variable name nor namespace name. 3459 addUnwrappedLine(); 3460 } else if (AccessSpecifierCandidate) { 3461 // Consider the access specifier to be a C identifier. 3462 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3463 } 3464 } 3465 3466 /// \brief Parses a requires, decides if it is a clause or an expression. 3467 /// \pre The current token has to be the requires keyword. 3468 /// \returns true if it parsed a clause. 3469 bool UnwrappedLineParser::parseRequires() { 3470 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3471 auto RequiresToken = FormatTok; 3472 3473 // We try to guess if it is a requires clause, or a requires expression. For 3474 // that we first consume the keyword and check the next token. 3475 nextToken(); 3476 3477 switch (FormatTok->Tok.getKind()) { 3478 case tok::l_brace: 3479 // This can only be an expression, never a clause. 3480 parseRequiresExpression(RequiresToken); 3481 return false; 3482 case tok::l_paren: 3483 // Clauses and expression can start with a paren, it's unclear what we have. 3484 break; 3485 default: 3486 // All other tokens can only be a clause. 3487 parseRequiresClause(RequiresToken); 3488 return true; 3489 } 3490 3491 // Looking forward we would have to decide if there are function declaration 3492 // like arguments to the requires expression: 3493 // requires (T t) { 3494 // Or there is a constraint expression for the requires clause: 3495 // requires (C<T> && ... 3496 3497 // But first let's look behind. 3498 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3499 3500 if (!PreviousNonComment || 3501 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3502 // If there is no token, or an expression left brace, we are a requires 3503 // clause within a requires expression. 3504 parseRequiresClause(RequiresToken); 3505 return true; 3506 } 3507 3508 switch (PreviousNonComment->Tok.getKind()) { 3509 case tok::greater: 3510 case tok::r_paren: 3511 case tok::kw_noexcept: 3512 case tok::kw_const: 3513 case tok::amp: 3514 // This is a requires clause. 3515 parseRequiresClause(RequiresToken); 3516 return true; 3517 case tok::ampamp: { 3518 // This can be either: 3519 // if (... && requires (T t) ...) 3520 // Or 3521 // void member(...) && requires (C<T> ... 3522 // We check the one token before that for a const: 3523 // void member(...) const && requires (C<T> ... 3524 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3525 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3526 parseRequiresClause(RequiresToken); 3527 return true; 3528 } 3529 break; 3530 } 3531 default: 3532 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) { 3533 // This is a requires clause. 3534 parseRequiresClause(RequiresToken); 3535 return true; 3536 } 3537 // It's an expression. 3538 parseRequiresExpression(RequiresToken); 3539 return false; 3540 } 3541 3542 // Now we look forward and try to check if the paren content is a parameter 3543 // list. The parameters can be cv-qualified and contain references or 3544 // pointers. 3545 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3546 // of stuff: typename, const, *, &, &&, ::, identifiers. 3547 3548 unsigned StoredPosition = Tokens->getPosition(); 3549 FormatToken *NextToken = Tokens->getNextToken(); 3550 int Lookahead = 0; 3551 auto PeekNext = [&Lookahead, &NextToken, this] { 3552 ++Lookahead; 3553 NextToken = Tokens->getNextToken(); 3554 }; 3555 3556 bool FoundType = false; 3557 bool LastWasColonColon = false; 3558 int OpenAngles = 0; 3559 3560 for (; Lookahead < 50; PeekNext()) { 3561 switch (NextToken->Tok.getKind()) { 3562 case tok::kw_volatile: 3563 case tok::kw_const: 3564 case tok::comma: 3565 if (OpenAngles == 0) { 3566 FormatTok = Tokens->setPosition(StoredPosition); 3567 parseRequiresExpression(RequiresToken); 3568 return false; 3569 } 3570 break; 3571 case tok::eof: 3572 // Break out of the loop. 3573 Lookahead = 50; 3574 break; 3575 case tok::coloncolon: 3576 LastWasColonColon = true; 3577 break; 3578 case tok::kw_decltype: 3579 case tok::identifier: 3580 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3581 FormatTok = Tokens->setPosition(StoredPosition); 3582 parseRequiresExpression(RequiresToken); 3583 return false; 3584 } 3585 FoundType = true; 3586 LastWasColonColon = false; 3587 break; 3588 case tok::less: 3589 ++OpenAngles; 3590 break; 3591 case tok::greater: 3592 --OpenAngles; 3593 break; 3594 default: 3595 if (NextToken->isTypeName(LangOpts)) { 3596 FormatTok = Tokens->setPosition(StoredPosition); 3597 parseRequiresExpression(RequiresToken); 3598 return false; 3599 } 3600 break; 3601 } 3602 } 3603 // This seems to be a complicated expression, just assume it's a clause. 3604 FormatTok = Tokens->setPosition(StoredPosition); 3605 parseRequiresClause(RequiresToken); 3606 return true; 3607 } 3608 3609 /// \brief Parses a requires clause. 3610 /// \param RequiresToken The requires keyword token, which starts this clause. 3611 /// \pre We need to be on the next token after the requires keyword. 3612 /// \sa parseRequiresExpression 3613 /// 3614 /// Returns if it either has finished parsing the clause, or it detects, that 3615 /// the clause is incorrect. 3616 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3617 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3618 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3619 3620 // If there is no previous token, we are within a requires expression, 3621 // otherwise we will always have the template or function declaration in front 3622 // of it. 3623 bool InRequiresExpression = 3624 !RequiresToken->Previous || 3625 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3626 3627 RequiresToken->setFinalizedType(InRequiresExpression 3628 ? TT_RequiresClauseInARequiresExpression 3629 : TT_RequiresClause); 3630 3631 // NOTE: parseConstraintExpression is only ever called from this function. 3632 // It could be inlined into here. 3633 parseConstraintExpression(); 3634 3635 if (!InRequiresExpression) 3636 FormatTok->Previous->ClosesRequiresClause = true; 3637 } 3638 3639 /// \brief Parses a requires expression. 3640 /// \param RequiresToken The requires keyword token, which starts this clause. 3641 /// \pre We need to be on the next token after the requires keyword. 3642 /// \sa parseRequiresClause 3643 /// 3644 /// Returns if it either has finished parsing the expression, or it detects, 3645 /// that the expression is incorrect. 3646 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3647 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3648 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3649 3650 RequiresToken->setFinalizedType(TT_RequiresExpression); 3651 3652 if (FormatTok->is(tok::l_paren)) { 3653 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3654 parseParens(); 3655 } 3656 3657 if (FormatTok->is(tok::l_brace)) { 3658 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3659 parseChildBlock(); 3660 } 3661 } 3662 3663 /// \brief Parses a constraint expression. 3664 /// 3665 /// This is the body of a requires clause. It returns, when the parsing is 3666 /// complete, or the expression is incorrect. 3667 void UnwrappedLineParser::parseConstraintExpression() { 3668 // The special handling for lambdas is needed since tryToParseLambda() eats a 3669 // token and if a requires expression is the last part of a requires clause 3670 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3671 // not set on the correct token. Thus we need to be aware if we even expect a 3672 // lambda to be possible. 3673 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3674 bool LambdaNextTimeAllowed = true; 3675 3676 // Within lambda declarations, it is permitted to put a requires clause after 3677 // its template parameter list, which would place the requires clause right 3678 // before the parentheses of the parameters of the lambda declaration. Thus, 3679 // we track if we expect to see grouping parentheses at all. 3680 // Without this check, `requires foo<T> (T t)` in the below example would be 3681 // seen as the whole requires clause, accidentally eating the parameters of 3682 // the lambda. 3683 // [&]<typename T> requires foo<T> (T t) { ... }; 3684 bool TopLevelParensAllowed = true; 3685 3686 do { 3687 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3688 3689 switch (FormatTok->Tok.getKind()) { 3690 case tok::kw_requires: { 3691 auto RequiresToken = FormatTok; 3692 nextToken(); 3693 parseRequiresExpression(RequiresToken); 3694 break; 3695 } 3696 3697 case tok::l_paren: 3698 if (!TopLevelParensAllowed) 3699 return; 3700 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3701 TopLevelParensAllowed = false; 3702 break; 3703 3704 case tok::l_square: 3705 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3706 return; 3707 break; 3708 3709 case tok::kw_const: 3710 case tok::semi: 3711 case tok::kw_class: 3712 case tok::kw_struct: 3713 case tok::kw_union: 3714 return; 3715 3716 case tok::l_brace: 3717 // Potential function body. 3718 return; 3719 3720 case tok::ampamp: 3721 case tok::pipepipe: 3722 FormatTok->setFinalizedType(TT_BinaryOperator); 3723 nextToken(); 3724 LambdaNextTimeAllowed = true; 3725 TopLevelParensAllowed = true; 3726 break; 3727 3728 case tok::comma: 3729 case tok::comment: 3730 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3731 nextToken(); 3732 break; 3733 3734 case tok::kw_sizeof: 3735 case tok::greater: 3736 case tok::greaterequal: 3737 case tok::greatergreater: 3738 case tok::less: 3739 case tok::lessequal: 3740 case tok::lessless: 3741 case tok::equalequal: 3742 case tok::exclaim: 3743 case tok::exclaimequal: 3744 case tok::plus: 3745 case tok::minus: 3746 case tok::star: 3747 case tok::slash: 3748 LambdaNextTimeAllowed = true; 3749 TopLevelParensAllowed = true; 3750 // Just eat them. 3751 nextToken(); 3752 break; 3753 3754 case tok::numeric_constant: 3755 case tok::coloncolon: 3756 case tok::kw_true: 3757 case tok::kw_false: 3758 TopLevelParensAllowed = false; 3759 // Just eat them. 3760 nextToken(); 3761 break; 3762 3763 case tok::kw_static_cast: 3764 case tok::kw_const_cast: 3765 case tok::kw_reinterpret_cast: 3766 case tok::kw_dynamic_cast: 3767 nextToken(); 3768 if (FormatTok->isNot(tok::less)) 3769 return; 3770 3771 nextToken(); 3772 parseBracedList(/*IsAngleBracket=*/true); 3773 break; 3774 3775 default: 3776 if (!FormatTok->Tok.getIdentifierInfo()) { 3777 // Identifiers are part of the default case, we check for more then 3778 // tok::identifier to handle builtin type traits. 3779 return; 3780 } 3781 3782 // We need to differentiate identifiers for a template deduction guide, 3783 // variables, or function return types (the constraint expression has 3784 // ended before that), and basically all other cases. But it's easier to 3785 // check the other way around. 3786 assert(FormatTok->Previous); 3787 switch (FormatTok->Previous->Tok.getKind()) { 3788 case tok::coloncolon: // Nested identifier. 3789 case tok::ampamp: // Start of a function or variable for the 3790 case tok::pipepipe: // constraint expression. (binary) 3791 case tok::exclaim: // The same as above, but unary. 3792 case tok::kw_requires: // Initial identifier of a requires clause. 3793 case tok::equal: // Initial identifier of a concept declaration. 3794 break; 3795 default: 3796 return; 3797 } 3798 3799 // Read identifier with optional template declaration. 3800 nextToken(); 3801 if (FormatTok->is(tok::less)) { 3802 nextToken(); 3803 parseBracedList(/*IsAngleBracket=*/true); 3804 } 3805 TopLevelParensAllowed = false; 3806 break; 3807 } 3808 } while (!eof()); 3809 } 3810 3811 bool UnwrappedLineParser::parseEnum() { 3812 const FormatToken &InitialToken = *FormatTok; 3813 3814 // Won't be 'enum' for NS_ENUMs. 3815 if (FormatTok->is(tok::kw_enum)) 3816 nextToken(); 3817 3818 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3819 // declarations. An "enum" keyword followed by a colon would be a syntax 3820 // error and thus assume it is just an identifier. 3821 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3822 return false; 3823 3824 // In protobuf, "enum" can be used as a field name. 3825 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3826 return false; 3827 3828 if (IsCpp) { 3829 // Eat up enum class ... 3830 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3831 nextToken(); 3832 while (FormatTok->is(tok::l_square)) 3833 if (!handleCppAttributes()) 3834 return false; 3835 } 3836 3837 while (FormatTok->Tok.getIdentifierInfo() || 3838 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3839 tok::greater, tok::comma, tok::question, 3840 tok::l_square)) { 3841 if (Style.isVerilog()) { 3842 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3843 nextToken(); 3844 // In Verilog the base type can have dimensions. 3845 while (FormatTok->is(tok::l_square)) 3846 parseSquare(); 3847 } else { 3848 nextToken(); 3849 } 3850 // We can have macros or attributes in between 'enum' and the enum name. 3851 if (FormatTok->is(tok::l_paren)) 3852 parseParens(); 3853 if (FormatTok->is(tok::identifier)) { 3854 nextToken(); 3855 // If there are two identifiers in a row, this is likely an elaborate 3856 // return type. In Java, this can be "implements", etc. 3857 if (IsCpp && FormatTok->is(tok::identifier)) 3858 return false; 3859 } 3860 } 3861 3862 // Just a declaration or something is wrong. 3863 if (FormatTok->isNot(tok::l_brace)) 3864 return true; 3865 FormatTok->setFinalizedType(TT_EnumLBrace); 3866 FormatTok->setBlockKind(BK_Block); 3867 3868 if (Style.Language == FormatStyle::LK_Java) { 3869 // Java enums are different. 3870 parseJavaEnumBody(); 3871 return true; 3872 } 3873 if (Style.Language == FormatStyle::LK_Proto) { 3874 parseBlock(/*MustBeDeclaration=*/true); 3875 return true; 3876 } 3877 3878 if (!Style.AllowShortEnumsOnASingleLine && 3879 ShouldBreakBeforeBrace(Style, InitialToken)) { 3880 addUnwrappedLine(); 3881 } 3882 // Parse enum body. 3883 nextToken(); 3884 if (!Style.AllowShortEnumsOnASingleLine) { 3885 addUnwrappedLine(); 3886 Line->Level += 1; 3887 } 3888 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true); 3889 if (!Style.AllowShortEnumsOnASingleLine) 3890 Line->Level -= 1; 3891 if (HasError) { 3892 if (FormatTok->is(tok::semi)) 3893 nextToken(); 3894 addUnwrappedLine(); 3895 } 3896 setPreviousRBraceType(TT_EnumRBrace); 3897 return true; 3898 3899 // There is no addUnwrappedLine() here so that we fall through to parsing a 3900 // structural element afterwards. Thus, in "enum A {} n, m;", 3901 // "} n, m;" will end up in one unwrapped line. 3902 } 3903 3904 bool UnwrappedLineParser::parseStructLike() { 3905 // parseRecord falls through and does not yet add an unwrapped line as a 3906 // record declaration or definition can start a structural element. 3907 parseRecord(); 3908 // This does not apply to Java, JavaScript and C#. 3909 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3910 Style.isCSharp()) { 3911 if (FormatTok->is(tok::semi)) 3912 nextToken(); 3913 addUnwrappedLine(); 3914 return true; 3915 } 3916 return false; 3917 } 3918 3919 namespace { 3920 // A class used to set and restore the Token position when peeking 3921 // ahead in the token source. 3922 class ScopedTokenPosition { 3923 unsigned StoredPosition; 3924 FormatTokenSource *Tokens; 3925 3926 public: 3927 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3928 assert(Tokens && "Tokens expected to not be null"); 3929 StoredPosition = Tokens->getPosition(); 3930 } 3931 3932 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3933 }; 3934 } // namespace 3935 3936 // Look to see if we have [[ by looking ahead, if 3937 // its not then rewind to the original position. 3938 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3939 ScopedTokenPosition AutoPosition(Tokens); 3940 FormatToken *Tok = Tokens->getNextToken(); 3941 // We already read the first [ check for the second. 3942 if (Tok->isNot(tok::l_square)) 3943 return false; 3944 // Double check that the attribute is just something 3945 // fairly simple. 3946 while (Tok->isNot(tok::eof)) { 3947 if (Tok->is(tok::r_square)) 3948 break; 3949 Tok = Tokens->getNextToken(); 3950 } 3951 if (Tok->is(tok::eof)) 3952 return false; 3953 Tok = Tokens->getNextToken(); 3954 if (Tok->isNot(tok::r_square)) 3955 return false; 3956 Tok = Tokens->getNextToken(); 3957 if (Tok->is(tok::semi)) 3958 return false; 3959 return true; 3960 } 3961 3962 void UnwrappedLineParser::parseJavaEnumBody() { 3963 assert(FormatTok->is(tok::l_brace)); 3964 const FormatToken *OpeningBrace = FormatTok; 3965 3966 // Determine whether the enum is simple, i.e. does not have a semicolon or 3967 // constants with class bodies. Simple enums can be formatted like braced 3968 // lists, contracted to a single line, etc. 3969 unsigned StoredPosition = Tokens->getPosition(); 3970 bool IsSimple = true; 3971 FormatToken *Tok = Tokens->getNextToken(); 3972 while (Tok->isNot(tok::eof)) { 3973 if (Tok->is(tok::r_brace)) 3974 break; 3975 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3976 IsSimple = false; 3977 break; 3978 } 3979 // FIXME: This will also mark enums with braces in the arguments to enum 3980 // constants as "not simple". This is probably fine in practice, though. 3981 Tok = Tokens->getNextToken(); 3982 } 3983 FormatTok = Tokens->setPosition(StoredPosition); 3984 3985 if (IsSimple) { 3986 nextToken(); 3987 parseBracedList(); 3988 addUnwrappedLine(); 3989 return; 3990 } 3991 3992 // Parse the body of a more complex enum. 3993 // First add a line for everything up to the "{". 3994 nextToken(); 3995 addUnwrappedLine(); 3996 ++Line->Level; 3997 3998 // Parse the enum constants. 3999 while (!eof()) { 4000 if (FormatTok->is(tok::l_brace)) { 4001 // Parse the constant's class body. 4002 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 4003 /*MunchSemi=*/false); 4004 } else if (FormatTok->is(tok::l_paren)) { 4005 parseParens(); 4006 } else if (FormatTok->is(tok::comma)) { 4007 nextToken(); 4008 addUnwrappedLine(); 4009 } else if (FormatTok->is(tok::semi)) { 4010 nextToken(); 4011 addUnwrappedLine(); 4012 break; 4013 } else if (FormatTok->is(tok::r_brace)) { 4014 addUnwrappedLine(); 4015 break; 4016 } else { 4017 nextToken(); 4018 } 4019 } 4020 4021 // Parse the class body after the enum's ";" if any. 4022 parseLevel(OpeningBrace); 4023 nextToken(); 4024 --Line->Level; 4025 addUnwrappedLine(); 4026 } 4027 4028 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 4029 const FormatToken &InitialToken = *FormatTok; 4030 nextToken(); 4031 4032 const FormatToken *ClassName = nullptr; 4033 bool IsDerived = false; 4034 auto IsNonMacroIdentifier = [](const FormatToken *Tok) { 4035 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper(); 4036 }; 4037 // JavaScript/TypeScript supports anonymous classes like: 4038 // a = class extends foo { } 4039 bool JSPastExtendsOrImplements = false; 4040 // The actual identifier can be a nested name specifier, and in macros 4041 // it is often token-pasted. 4042 // An [[attribute]] can be before the identifier. 4043 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 4044 tok::kw_alignas, tok::l_square) || 4045 FormatTok->isAttribute() || 4046 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 4047 FormatTok->isOneOf(tok::period, tok::comma))) { 4048 if (Style.isJavaScript() && 4049 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 4050 JSPastExtendsOrImplements = true; 4051 // JavaScript/TypeScript supports inline object types in 4052 // extends/implements positions: 4053 // class Foo implements {bar: number} { } 4054 nextToken(); 4055 if (FormatTok->is(tok::l_brace)) { 4056 tryToParseBracedList(); 4057 continue; 4058 } 4059 } 4060 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 4061 continue; 4062 const auto *Previous = FormatTok; 4063 nextToken(); 4064 switch (FormatTok->Tok.getKind()) { 4065 case tok::l_paren: 4066 // We can have macros in between 'class' and the class name. 4067 if (!IsNonMacroIdentifier(Previous) || 4068 // e.g. `struct macro(a) S { int i; };` 4069 Previous->Previous == &InitialToken) { 4070 parseParens(); 4071 } 4072 break; 4073 case tok::coloncolon: 4074 case tok::hashhash: 4075 break; 4076 default: 4077 if (!JSPastExtendsOrImplements && !ClassName && 4078 Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro) && 4079 Previous->TokenText != Previous->TokenText.upper()) { 4080 ClassName = Previous; 4081 } 4082 } 4083 } 4084 4085 auto IsListInitialization = [&] { 4086 if (!ClassName || IsDerived || JSPastExtendsOrImplements) 4087 return false; 4088 assert(FormatTok->is(tok::l_brace)); 4089 const auto *Prev = FormatTok->getPreviousNonComment(); 4090 assert(Prev); 4091 return Prev != ClassName && Prev->is(tok::identifier) && 4092 Prev->isNot(Keywords.kw_final) && tryToParseBracedList(); 4093 }; 4094 4095 if (FormatTok->isOneOf(tok::colon, tok::less)) { 4096 int AngleNestingLevel = 0; 4097 do { 4098 if (FormatTok->is(tok::less)) 4099 ++AngleNestingLevel; 4100 else if (FormatTok->is(tok::greater)) 4101 --AngleNestingLevel; 4102 4103 if (AngleNestingLevel == 0) { 4104 if (FormatTok->is(tok::colon)) { 4105 IsDerived = true; 4106 } else if (FormatTok->is(tok::identifier) && 4107 FormatTok->Previous->is(tok::coloncolon)) { 4108 ClassName = FormatTok; 4109 } else if (FormatTok->is(tok::l_paren) && 4110 IsNonMacroIdentifier(FormatTok->Previous)) { 4111 break; 4112 } 4113 } 4114 if (FormatTok->is(tok::l_brace)) { 4115 if (AngleNestingLevel == 0 && IsListInitialization()) 4116 return; 4117 calculateBraceTypes(/*ExpectClassBody=*/true); 4118 if (!tryToParseBracedList()) 4119 break; 4120 } 4121 if (FormatTok->is(tok::l_square)) { 4122 FormatToken *Previous = FormatTok->Previous; 4123 if (!Previous || (Previous->isNot(tok::r_paren) && 4124 !Previous->isTypeOrIdentifier(LangOpts))) { 4125 // Don't try parsing a lambda if we had a closing parenthesis before, 4126 // it was probably a pointer to an array: int (*)[]. 4127 if (!tryToParseLambda()) 4128 continue; 4129 } else { 4130 parseSquare(); 4131 continue; 4132 } 4133 } 4134 if (FormatTok->is(tok::semi)) 4135 return; 4136 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 4137 addUnwrappedLine(); 4138 nextToken(); 4139 parseCSharpGenericTypeConstraint(); 4140 break; 4141 } 4142 nextToken(); 4143 } while (!eof()); 4144 } 4145 4146 auto GetBraceTypes = 4147 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> { 4148 switch (RecordTok.Tok.getKind()) { 4149 case tok::kw_class: 4150 return {TT_ClassLBrace, TT_ClassRBrace}; 4151 case tok::kw_struct: 4152 return {TT_StructLBrace, TT_StructRBrace}; 4153 case tok::kw_union: 4154 return {TT_UnionLBrace, TT_UnionRBrace}; 4155 default: 4156 // Useful for e.g. interface. 4157 return {TT_RecordLBrace, TT_RecordRBrace}; 4158 } 4159 }; 4160 if (FormatTok->is(tok::l_brace)) { 4161 if (IsListInitialization()) 4162 return; 4163 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken); 4164 FormatTok->setFinalizedType(OpenBraceType); 4165 if (ParseAsExpr) { 4166 parseChildBlock(); 4167 } else { 4168 if (ShouldBreakBeforeBrace(Style, InitialToken)) 4169 addUnwrappedLine(); 4170 4171 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 4172 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 4173 } 4174 setPreviousRBraceType(ClosingBraceType); 4175 } 4176 // There is no addUnwrappedLine() here so that we fall through to parsing a 4177 // structural element afterwards. Thus, in "class A {} n, m;", 4178 // "} n, m;" will end up in one unwrapped line. 4179 } 4180 4181 void UnwrappedLineParser::parseObjCMethod() { 4182 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 4183 "'(' or identifier expected."); 4184 do { 4185 if (FormatTok->is(tok::semi)) { 4186 nextToken(); 4187 addUnwrappedLine(); 4188 return; 4189 } else if (FormatTok->is(tok::l_brace)) { 4190 if (Style.BraceWrapping.AfterFunction) 4191 addUnwrappedLine(); 4192 parseBlock(); 4193 addUnwrappedLine(); 4194 return; 4195 } else { 4196 nextToken(); 4197 } 4198 } while (!eof()); 4199 } 4200 4201 void UnwrappedLineParser::parseObjCProtocolList() { 4202 assert(FormatTok->is(tok::less) && "'<' expected."); 4203 do { 4204 nextToken(); 4205 // Early exit in case someone forgot a close angle. 4206 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4207 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4208 return; 4209 } 4210 } while (!eof() && FormatTok->isNot(tok::greater)); 4211 nextToken(); // Skip '>'. 4212 } 4213 4214 void UnwrappedLineParser::parseObjCUntilAtEnd() { 4215 do { 4216 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 4217 nextToken(); 4218 addUnwrappedLine(); 4219 break; 4220 } 4221 if (FormatTok->is(tok::l_brace)) { 4222 parseBlock(); 4223 // In ObjC interfaces, nothing should be following the "}". 4224 addUnwrappedLine(); 4225 } else if (FormatTok->is(tok::r_brace)) { 4226 // Ignore stray "}". parseStructuralElement doesn't consume them. 4227 nextToken(); 4228 addUnwrappedLine(); 4229 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 4230 nextToken(); 4231 parseObjCMethod(); 4232 } else { 4233 parseStructuralElement(); 4234 } 4235 } while (!eof()); 4236 } 4237 4238 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 4239 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 4240 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 4241 nextToken(); 4242 nextToken(); // interface name 4243 4244 // @interface can be followed by a lightweight generic 4245 // specialization list, then either a base class or a category. 4246 if (FormatTok->is(tok::less)) 4247 parseObjCLightweightGenerics(); 4248 if (FormatTok->is(tok::colon)) { 4249 nextToken(); 4250 nextToken(); // base class name 4251 // The base class can also have lightweight generics applied to it. 4252 if (FormatTok->is(tok::less)) 4253 parseObjCLightweightGenerics(); 4254 } else if (FormatTok->is(tok::l_paren)) { 4255 // Skip category, if present. 4256 parseParens(); 4257 } 4258 4259 if (FormatTok->is(tok::less)) 4260 parseObjCProtocolList(); 4261 4262 if (FormatTok->is(tok::l_brace)) { 4263 if (Style.BraceWrapping.AfterObjCDeclaration) 4264 addUnwrappedLine(); 4265 parseBlock(/*MustBeDeclaration=*/true); 4266 } 4267 4268 // With instance variables, this puts '}' on its own line. Without instance 4269 // variables, this ends the @interface line. 4270 addUnwrappedLine(); 4271 4272 parseObjCUntilAtEnd(); 4273 } 4274 4275 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4276 assert(FormatTok->is(tok::less)); 4277 // Unlike protocol lists, generic parameterizations support 4278 // nested angles: 4279 // 4280 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4281 // NSObject <NSCopying, NSSecureCoding> 4282 // 4283 // so we need to count how many open angles we have left. 4284 unsigned NumOpenAngles = 1; 4285 do { 4286 nextToken(); 4287 // Early exit in case someone forgot a close angle. 4288 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4289 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4290 break; 4291 } 4292 if (FormatTok->is(tok::less)) { 4293 ++NumOpenAngles; 4294 } else if (FormatTok->is(tok::greater)) { 4295 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4296 --NumOpenAngles; 4297 } 4298 } while (!eof() && NumOpenAngles != 0); 4299 nextToken(); // Skip '>'. 4300 } 4301 4302 // Returns true for the declaration/definition form of @protocol, 4303 // false for the expression form. 4304 bool UnwrappedLineParser::parseObjCProtocol() { 4305 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4306 nextToken(); 4307 4308 if (FormatTok->is(tok::l_paren)) { 4309 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4310 return false; 4311 } 4312 4313 // The definition/declaration form, 4314 // @protocol Foo 4315 // - (int)someMethod; 4316 // @end 4317 4318 nextToken(); // protocol name 4319 4320 if (FormatTok->is(tok::less)) 4321 parseObjCProtocolList(); 4322 4323 // Check for protocol declaration. 4324 if (FormatTok->is(tok::semi)) { 4325 nextToken(); 4326 addUnwrappedLine(); 4327 return true; 4328 } 4329 4330 addUnwrappedLine(); 4331 parseObjCUntilAtEnd(); 4332 return true; 4333 } 4334 4335 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4336 bool IsImport = FormatTok->is(Keywords.kw_import); 4337 assert(IsImport || FormatTok->is(tok::kw_export)); 4338 nextToken(); 4339 4340 // Consume the "default" in "export default class/function". 4341 if (FormatTok->is(tok::kw_default)) 4342 nextToken(); 4343 4344 // Consume "async function", "function" and "default function", so that these 4345 // get parsed as free-standing JS functions, i.e. do not require a trailing 4346 // semicolon. 4347 if (FormatTok->is(Keywords.kw_async)) 4348 nextToken(); 4349 if (FormatTok->is(Keywords.kw_function)) { 4350 nextToken(); 4351 return; 4352 } 4353 4354 // For imports, `export *`, `export {...}`, consume the rest of the line up 4355 // to the terminating `;`. For everything else, just return and continue 4356 // parsing the structural element, i.e. the declaration or expression for 4357 // `export default`. 4358 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4359 !FormatTok->isStringLiteral() && 4360 !(FormatTok->is(Keywords.kw_type) && 4361 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4362 return; 4363 } 4364 4365 while (!eof()) { 4366 if (FormatTok->is(tok::semi)) 4367 return; 4368 if (Line->Tokens.empty()) { 4369 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4370 // import statement should terminate. 4371 return; 4372 } 4373 if (FormatTok->is(tok::l_brace)) { 4374 FormatTok->setBlockKind(BK_Block); 4375 nextToken(); 4376 parseBracedList(); 4377 } else { 4378 nextToken(); 4379 } 4380 } 4381 } 4382 4383 void UnwrappedLineParser::parseStatementMacro() { 4384 nextToken(); 4385 if (FormatTok->is(tok::l_paren)) 4386 parseParens(); 4387 if (FormatTok->is(tok::semi)) 4388 nextToken(); 4389 addUnwrappedLine(); 4390 } 4391 4392 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4393 // consume things like a::`b.c[d:e] or a::* 4394 while (true) { 4395 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4396 tok::coloncolon, tok::hash) || 4397 Keywords.isVerilogIdentifier(*FormatTok)) { 4398 nextToken(); 4399 } else if (FormatTok->is(tok::l_square)) { 4400 parseSquare(); 4401 } else { 4402 break; 4403 } 4404 } 4405 } 4406 4407 void UnwrappedLineParser::parseVerilogSensitivityList() { 4408 if (FormatTok->isNot(tok::at)) 4409 return; 4410 nextToken(); 4411 // A block event expression has 2 at signs. 4412 if (FormatTok->is(tok::at)) 4413 nextToken(); 4414 switch (FormatTok->Tok.getKind()) { 4415 case tok::star: 4416 nextToken(); 4417 break; 4418 case tok::l_paren: 4419 parseParens(); 4420 break; 4421 default: 4422 parseVerilogHierarchyIdentifier(); 4423 break; 4424 } 4425 } 4426 4427 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4428 unsigned AddLevels = 0; 4429 4430 if (FormatTok->is(Keywords.kw_clocking)) { 4431 nextToken(); 4432 if (Keywords.isVerilogIdentifier(*FormatTok)) 4433 nextToken(); 4434 parseVerilogSensitivityList(); 4435 if (FormatTok->is(tok::semi)) 4436 nextToken(); 4437 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4438 Keywords.kw_casez, Keywords.kw_randcase, 4439 Keywords.kw_randsequence)) { 4440 if (Style.IndentCaseLabels) 4441 AddLevels++; 4442 nextToken(); 4443 if (FormatTok->is(tok::l_paren)) { 4444 FormatTok->setFinalizedType(TT_ConditionLParen); 4445 parseParens(); 4446 } 4447 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4448 nextToken(); 4449 // The case header has no semicolon. 4450 } else { 4451 // "module" etc. 4452 nextToken(); 4453 // all the words like the name of the module and specifiers like 4454 // "automatic" and the width of function return type 4455 while (true) { 4456 if (FormatTok->is(tok::l_square)) { 4457 auto Prev = FormatTok->getPreviousNonComment(); 4458 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4459 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4460 parseSquare(); 4461 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4462 FormatTok->isOneOf(tok::hash, tok::hashhash, tok::coloncolon, 4463 Keywords.kw_automatic, tok::kw_static)) { 4464 nextToken(); 4465 } else { 4466 break; 4467 } 4468 } 4469 4470 auto NewLine = [this]() { 4471 addUnwrappedLine(); 4472 Line->IsContinuation = true; 4473 }; 4474 4475 // package imports 4476 while (FormatTok->is(Keywords.kw_import)) { 4477 NewLine(); 4478 nextToken(); 4479 parseVerilogHierarchyIdentifier(); 4480 if (FormatTok->is(tok::semi)) 4481 nextToken(); 4482 } 4483 4484 // parameters and ports 4485 if (FormatTok->is(Keywords.kw_verilogHash)) { 4486 NewLine(); 4487 nextToken(); 4488 if (FormatTok->is(tok::l_paren)) { 4489 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4490 parseParens(); 4491 } 4492 } 4493 if (FormatTok->is(tok::l_paren)) { 4494 NewLine(); 4495 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4496 parseParens(); 4497 } 4498 4499 // extends and implements 4500 if (FormatTok->is(Keywords.kw_extends)) { 4501 NewLine(); 4502 nextToken(); 4503 parseVerilogHierarchyIdentifier(); 4504 if (FormatTok->is(tok::l_paren)) 4505 parseParens(); 4506 } 4507 if (FormatTok->is(Keywords.kw_implements)) { 4508 NewLine(); 4509 do { 4510 nextToken(); 4511 parseVerilogHierarchyIdentifier(); 4512 } while (FormatTok->is(tok::comma)); 4513 } 4514 4515 // Coverage event for cover groups. 4516 if (FormatTok->is(tok::at)) { 4517 NewLine(); 4518 parseVerilogSensitivityList(); 4519 } 4520 4521 if (FormatTok->is(tok::semi)) 4522 nextToken(/*LevelDifference=*/1); 4523 addUnwrappedLine(); 4524 } 4525 4526 return AddLevels; 4527 } 4528 4529 void UnwrappedLineParser::parseVerilogTable() { 4530 assert(FormatTok->is(Keywords.kw_table)); 4531 nextToken(/*LevelDifference=*/1); 4532 addUnwrappedLine(); 4533 4534 auto InitialLevel = Line->Level++; 4535 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4536 FormatToken *Tok = FormatTok; 4537 nextToken(); 4538 if (Tok->is(tok::semi)) 4539 addUnwrappedLine(); 4540 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4541 Tok->setFinalizedType(TT_VerilogTableItem); 4542 } 4543 Line->Level = InitialLevel; 4544 nextToken(/*LevelDifference=*/-1); 4545 addUnwrappedLine(); 4546 } 4547 4548 void UnwrappedLineParser::parseVerilogCaseLabel() { 4549 // The label will get unindented in AnnotatingParser. If there are no leading 4550 // spaces, indent the rest here so that things inside the block will be 4551 // indented relative to things outside. We don't use parseLabel because we 4552 // don't know whether this colon is a label or a ternary expression at this 4553 // point. 4554 auto OrigLevel = Line->Level; 4555 auto FirstLine = CurrentLines->size(); 4556 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4557 ++Line->Level; 4558 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4559 --Line->Level; 4560 parseStructuralElement(); 4561 // Restore the indentation in both the new line and the line that has the 4562 // label. 4563 if (CurrentLines->size() > FirstLine) 4564 (*CurrentLines)[FirstLine].Level = OrigLevel; 4565 Line->Level = OrigLevel; 4566 } 4567 4568 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4569 for (const auto &N : Line.Tokens) { 4570 if (N.Tok->MacroCtx) 4571 return true; 4572 for (const UnwrappedLine &Child : N.Children) 4573 if (containsExpansion(Child)) 4574 return true; 4575 } 4576 return false; 4577 } 4578 4579 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4580 if (Line->Tokens.empty()) 4581 return; 4582 LLVM_DEBUG({ 4583 if (!parsingPPDirective()) { 4584 llvm::dbgs() << "Adding unwrapped line:\n"; 4585 printDebugInfo(*Line); 4586 } 4587 }); 4588 4589 // If this line closes a block when in Whitesmiths mode, remember that 4590 // information so that the level can be decreased after the line is added. 4591 // This has to happen after the addition of the line since the line itself 4592 // needs to be indented. 4593 bool ClosesWhitesmithsBlock = 4594 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4595 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4596 4597 // If the current line was expanded from a macro call, we use it to 4598 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4599 // line and the unexpanded token stream. 4600 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4601 if (!Reconstruct) 4602 Reconstruct.emplace(Line->Level, Unexpanded); 4603 Reconstruct->addLine(*Line); 4604 4605 // While the reconstructed unexpanded lines are stored in the normal 4606 // flow of lines, the expanded lines are stored on the side to be analyzed 4607 // in an extra step. 4608 CurrentExpandedLines.push_back(std::move(*Line)); 4609 4610 if (Reconstruct->finished()) { 4611 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4612 assert(!Reconstructed.Tokens.empty() && 4613 "Reconstructed must at least contain the macro identifier."); 4614 assert(!parsingPPDirective()); 4615 LLVM_DEBUG({ 4616 llvm::dbgs() << "Adding unexpanded line:\n"; 4617 printDebugInfo(Reconstructed); 4618 }); 4619 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4620 Lines.push_back(std::move(Reconstructed)); 4621 CurrentExpandedLines.clear(); 4622 Reconstruct.reset(); 4623 } 4624 } else { 4625 // At the top level we only get here when no unexpansion is going on, or 4626 // when conditional formatting led to unfinished macro reconstructions. 4627 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4628 CurrentLines->push_back(std::move(*Line)); 4629 } 4630 Line->Tokens.clear(); 4631 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4632 Line->FirstStartColumn = 0; 4633 Line->IsContinuation = false; 4634 Line->SeenDecltypeAuto = false; 4635 4636 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4637 --Line->Level; 4638 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4639 CurrentLines->append( 4640 std::make_move_iterator(PreprocessorDirectives.begin()), 4641 std::make_move_iterator(PreprocessorDirectives.end())); 4642 PreprocessorDirectives.clear(); 4643 } 4644 // Disconnect the current token from the last token on the previous line. 4645 FormatTok->Previous = nullptr; 4646 } 4647 4648 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4649 4650 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4651 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4652 FormatTok.NewlinesBefore > 0; 4653 } 4654 4655 // Checks if \p FormatTok is a line comment that continues the line comment 4656 // section on \p Line. 4657 static bool 4658 continuesLineCommentSection(const FormatToken &FormatTok, 4659 const UnwrappedLine &Line, const FormatStyle &Style, 4660 const llvm::Regex &CommentPragmasRegex) { 4661 if (Line.Tokens.empty() || Style.ReflowComments != FormatStyle::RCS_Always) 4662 return false; 4663 4664 StringRef IndentContent = FormatTok.TokenText; 4665 if (FormatTok.TokenText.starts_with("//") || 4666 FormatTok.TokenText.starts_with("/*")) { 4667 IndentContent = FormatTok.TokenText.substr(2); 4668 } 4669 if (CommentPragmasRegex.match(IndentContent)) 4670 return false; 4671 4672 // If Line starts with a line comment, then FormatTok continues the comment 4673 // section if its original column is greater or equal to the original start 4674 // column of the line. 4675 // 4676 // Define the min column token of a line as follows: if a line ends in '{' or 4677 // contains a '{' followed by a line comment, then the min column token is 4678 // that '{'. Otherwise, the min column token of the line is the first token of 4679 // the line. 4680 // 4681 // If Line starts with a token other than a line comment, then FormatTok 4682 // continues the comment section if its original column is greater than the 4683 // original start column of the min column token of the line. 4684 // 4685 // For example, the second line comment continues the first in these cases: 4686 // 4687 // // first line 4688 // // second line 4689 // 4690 // and: 4691 // 4692 // // first line 4693 // // second line 4694 // 4695 // and: 4696 // 4697 // int i; // first line 4698 // // second line 4699 // 4700 // and: 4701 // 4702 // do { // first line 4703 // // second line 4704 // int i; 4705 // } while (true); 4706 // 4707 // and: 4708 // 4709 // enum { 4710 // a, // first line 4711 // // second line 4712 // b 4713 // }; 4714 // 4715 // The second line comment doesn't continue the first in these cases: 4716 // 4717 // // first line 4718 // // second line 4719 // 4720 // and: 4721 // 4722 // int i; // first line 4723 // // second line 4724 // 4725 // and: 4726 // 4727 // do { // first line 4728 // // second line 4729 // int i; 4730 // } while (true); 4731 // 4732 // and: 4733 // 4734 // enum { 4735 // a, // first line 4736 // // second line 4737 // }; 4738 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4739 4740 // Scan for '{//'. If found, use the column of '{' as a min column for line 4741 // comment section continuation. 4742 const FormatToken *PreviousToken = nullptr; 4743 for (const UnwrappedLineNode &Node : Line.Tokens) { 4744 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4745 isLineComment(*Node.Tok)) { 4746 MinColumnToken = PreviousToken; 4747 break; 4748 } 4749 PreviousToken = Node.Tok; 4750 4751 // Grab the last newline preceding a token in this unwrapped line. 4752 if (Node.Tok->NewlinesBefore > 0) 4753 MinColumnToken = Node.Tok; 4754 } 4755 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4756 MinColumnToken = PreviousToken; 4757 4758 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4759 MinColumnToken); 4760 } 4761 4762 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4763 bool JustComments = Line->Tokens.empty(); 4764 for (FormatToken *Tok : CommentsBeforeNextToken) { 4765 // Line comments that belong to the same line comment section are put on the 4766 // same line since later we might want to reflow content between them. 4767 // Additional fine-grained breaking of line comment sections is controlled 4768 // by the class BreakableLineCommentSection in case it is desirable to keep 4769 // several line comment sections in the same unwrapped line. 4770 // 4771 // FIXME: Consider putting separate line comment sections as children to the 4772 // unwrapped line instead. 4773 Tok->ContinuesLineCommentSection = 4774 continuesLineCommentSection(*Tok, *Line, Style, CommentPragmasRegex); 4775 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4776 addUnwrappedLine(); 4777 pushToken(Tok); 4778 } 4779 if (NewlineBeforeNext && JustComments) 4780 addUnwrappedLine(); 4781 CommentsBeforeNextToken.clear(); 4782 } 4783 4784 void UnwrappedLineParser::nextToken(int LevelDifference) { 4785 if (eof()) 4786 return; 4787 flushComments(isOnNewLine(*FormatTok)); 4788 pushToken(FormatTok); 4789 FormatToken *Previous = FormatTok; 4790 if (!Style.isJavaScript()) 4791 readToken(LevelDifference); 4792 else 4793 readTokenWithJavaScriptASI(); 4794 FormatTok->Previous = Previous; 4795 if (Style.isVerilog()) { 4796 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4797 // keywords like `begin`, we can't treat them the same as left braces 4798 // because some contexts require one of them. For example structs use 4799 // braces and if blocks use keywords, and a left brace can occur in an if 4800 // statement, but it is not a block. For keywords like `end`, we simply 4801 // treat them the same as right braces. 4802 if (Keywords.isVerilogEnd(*FormatTok)) 4803 FormatTok->Tok.setKind(tok::r_brace); 4804 } 4805 } 4806 4807 void UnwrappedLineParser::distributeComments( 4808 const ArrayRef<FormatToken *> &Comments, const FormatToken *NextTok) { 4809 // Whether or not a line comment token continues a line is controlled by 4810 // the method continuesLineCommentSection, with the following caveat: 4811 // 4812 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4813 // that each comment line from the trail is aligned with the next token, if 4814 // the next token exists. If a trail exists, the beginning of the maximal 4815 // trail is marked as a start of a new comment section. 4816 // 4817 // For example in this code: 4818 // 4819 // int a; // line about a 4820 // // line 1 about b 4821 // // line 2 about b 4822 // int b; 4823 // 4824 // the two lines about b form a maximal trail, so there are two sections, the 4825 // first one consisting of the single comment "// line about a" and the 4826 // second one consisting of the next two comments. 4827 if (Comments.empty()) 4828 return; 4829 bool ShouldPushCommentsInCurrentLine = true; 4830 bool HasTrailAlignedWithNextToken = false; 4831 unsigned StartOfTrailAlignedWithNextToken = 0; 4832 if (NextTok) { 4833 // We are skipping the first element intentionally. 4834 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4835 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4836 HasTrailAlignedWithNextToken = true; 4837 StartOfTrailAlignedWithNextToken = i; 4838 } 4839 } 4840 } 4841 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4842 FormatToken *FormatTok = Comments[i]; 4843 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4844 FormatTok->ContinuesLineCommentSection = false; 4845 } else { 4846 FormatTok->ContinuesLineCommentSection = continuesLineCommentSection( 4847 *FormatTok, *Line, Style, CommentPragmasRegex); 4848 } 4849 if (!FormatTok->ContinuesLineCommentSection && 4850 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4851 ShouldPushCommentsInCurrentLine = false; 4852 } 4853 if (ShouldPushCommentsInCurrentLine) 4854 pushToken(FormatTok); 4855 else 4856 CommentsBeforeNextToken.push_back(FormatTok); 4857 } 4858 } 4859 4860 void UnwrappedLineParser::readToken(int LevelDifference) { 4861 SmallVector<FormatToken *, 1> Comments; 4862 bool PreviousWasComment = false; 4863 bool FirstNonCommentOnLine = false; 4864 do { 4865 FormatTok = Tokens->getNextToken(); 4866 assert(FormatTok); 4867 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd, 4868 TT_ConflictAlternative)) { 4869 if (FormatTok->is(TT_ConflictStart)) 4870 conditionalCompilationStart(/*Unreachable=*/false); 4871 else if (FormatTok->is(TT_ConflictAlternative)) 4872 conditionalCompilationAlternative(); 4873 else if (FormatTok->is(TT_ConflictEnd)) 4874 conditionalCompilationEnd(); 4875 FormatTok = Tokens->getNextToken(); 4876 FormatTok->MustBreakBefore = true; 4877 FormatTok->MustBreakBeforeFinalized = true; 4878 } 4879 4880 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4881 const FormatToken &Tok, 4882 bool PreviousWasComment) { 4883 auto IsFirstOnLine = [](const FormatToken &Tok) { 4884 return Tok.HasUnescapedNewline || Tok.IsFirst; 4885 }; 4886 4887 // Consider preprocessor directives preceded by block comments as first 4888 // on line. 4889 if (PreviousWasComment) 4890 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4891 return IsFirstOnLine(Tok); 4892 }; 4893 4894 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4895 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4896 PreviousWasComment = FormatTok->is(tok::comment); 4897 4898 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4899 (!Style.isVerilog() || 4900 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4901 FirstNonCommentOnLine) { 4902 distributeComments(Comments, FormatTok); 4903 Comments.clear(); 4904 // If there is an unfinished unwrapped line, we flush the preprocessor 4905 // directives only after that unwrapped line was finished later. 4906 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4907 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4908 assert((LevelDifference >= 0 || 4909 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4910 "LevelDifference makes Line->Level negative"); 4911 Line->Level += LevelDifference; 4912 // Comments stored before the preprocessor directive need to be output 4913 // before the preprocessor directive, at the same level as the 4914 // preprocessor directive, as we consider them to apply to the directive. 4915 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4916 PPBranchLevel > 0) { 4917 Line->Level += PPBranchLevel; 4918 } 4919 assert(Line->Level >= Line->UnbracedBodyLevel); 4920 Line->Level -= Line->UnbracedBodyLevel; 4921 flushComments(isOnNewLine(*FormatTok)); 4922 parsePPDirective(); 4923 PreviousWasComment = FormatTok->is(tok::comment); 4924 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4925 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4926 } 4927 4928 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4929 !Line->InPPDirective) { 4930 continue; 4931 } 4932 4933 if (FormatTok->is(tok::identifier) && 4934 Macros.defined(FormatTok->TokenText) && 4935 // FIXME: Allow expanding macros in preprocessor directives. 4936 !Line->InPPDirective) { 4937 FormatToken *ID = FormatTok; 4938 unsigned Position = Tokens->getPosition(); 4939 4940 // To correctly parse the code, we need to replace the tokens of the macro 4941 // call with its expansion. 4942 auto PreCall = std::move(Line); 4943 Line.reset(new UnwrappedLine); 4944 bool OldInExpansion = InExpansion; 4945 InExpansion = true; 4946 // We parse the macro call into a new line. 4947 auto Args = parseMacroCall(); 4948 InExpansion = OldInExpansion; 4949 assert(Line->Tokens.front().Tok == ID); 4950 // And remember the unexpanded macro call tokens. 4951 auto UnexpandedLine = std::move(Line); 4952 // Reset to the old line. 4953 Line = std::move(PreCall); 4954 4955 LLVM_DEBUG({ 4956 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4957 if (Args) { 4958 llvm::dbgs() << "("; 4959 for (const auto &Arg : Args.value()) 4960 for (const auto &T : Arg) 4961 llvm::dbgs() << T->TokenText << " "; 4962 llvm::dbgs() << ")"; 4963 } 4964 llvm::dbgs() << "\n"; 4965 }); 4966 if (Macros.objectLike(ID->TokenText) && Args && 4967 !Macros.hasArity(ID->TokenText, Args->size())) { 4968 // The macro is either 4969 // - object-like, but we got argumnets, or 4970 // - overloaded to be both object-like and function-like, but none of 4971 // the function-like arities match the number of arguments. 4972 // Thus, expand as object-like macro. 4973 LLVM_DEBUG(llvm::dbgs() 4974 << "Macro \"" << ID->TokenText 4975 << "\" not overloaded for arity " << Args->size() 4976 << "or not function-like, using object-like overload."); 4977 Args.reset(); 4978 UnexpandedLine->Tokens.resize(1); 4979 Tokens->setPosition(Position); 4980 nextToken(); 4981 assert(!Args && Macros.objectLike(ID->TokenText)); 4982 } 4983 if ((!Args && Macros.objectLike(ID->TokenText)) || 4984 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4985 // Next, we insert the expanded tokens in the token stream at the 4986 // current position, and continue parsing. 4987 Unexpanded[ID] = std::move(UnexpandedLine); 4988 SmallVector<FormatToken *, 8> Expansion = 4989 Macros.expand(ID, std::move(Args)); 4990 if (!Expansion.empty()) 4991 FormatTok = Tokens->insertTokens(Expansion); 4992 4993 LLVM_DEBUG({ 4994 llvm::dbgs() << "Expanded: "; 4995 for (const auto &T : Expansion) 4996 llvm::dbgs() << T->TokenText << " "; 4997 llvm::dbgs() << "\n"; 4998 }); 4999 } else { 5000 LLVM_DEBUG({ 5001 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 5002 << "\", because it was used "; 5003 if (Args) 5004 llvm::dbgs() << "with " << Args->size(); 5005 else 5006 llvm::dbgs() << "without"; 5007 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 5008 }); 5009 Tokens->setPosition(Position); 5010 FormatTok = ID; 5011 } 5012 } 5013 5014 if (FormatTok->isNot(tok::comment)) { 5015 distributeComments(Comments, FormatTok); 5016 Comments.clear(); 5017 return; 5018 } 5019 5020 Comments.push_back(FormatTok); 5021 } while (!eof()); 5022 5023 distributeComments(Comments, nullptr); 5024 Comments.clear(); 5025 } 5026 5027 namespace { 5028 template <typename Iterator> 5029 void pushTokens(Iterator Begin, Iterator End, 5030 SmallVectorImpl<FormatToken *> &Into) { 5031 for (auto I = Begin; I != End; ++I) { 5032 Into.push_back(I->Tok); 5033 for (const auto &Child : I->Children) 5034 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 5035 } 5036 } 5037 } // namespace 5038 5039 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 5040 UnwrappedLineParser::parseMacroCall() { 5041 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 5042 assert(Line->Tokens.empty()); 5043 nextToken(); 5044 if (FormatTok->isNot(tok::l_paren)) 5045 return Args; 5046 unsigned Position = Tokens->getPosition(); 5047 FormatToken *Tok = FormatTok; 5048 nextToken(); 5049 Args.emplace(); 5050 auto ArgStart = std::prev(Line->Tokens.end()); 5051 5052 int Parens = 0; 5053 do { 5054 switch (FormatTok->Tok.getKind()) { 5055 case tok::l_paren: 5056 ++Parens; 5057 nextToken(); 5058 break; 5059 case tok::r_paren: { 5060 if (Parens > 0) { 5061 --Parens; 5062 nextToken(); 5063 break; 5064 } 5065 Args->push_back({}); 5066 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5067 nextToken(); 5068 return Args; 5069 } 5070 case tok::comma: { 5071 if (Parens > 0) { 5072 nextToken(); 5073 break; 5074 } 5075 Args->push_back({}); 5076 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5077 nextToken(); 5078 ArgStart = std::prev(Line->Tokens.end()); 5079 break; 5080 } 5081 default: 5082 nextToken(); 5083 break; 5084 } 5085 } while (!eof()); 5086 Line->Tokens.resize(1); 5087 Tokens->setPosition(Position); 5088 FormatTok = Tok; 5089 return {}; 5090 } 5091 5092 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 5093 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 5094 if (MustBreakBeforeNextToken) { 5095 Line->Tokens.back().Tok->MustBreakBefore = true; 5096 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; 5097 MustBreakBeforeNextToken = false; 5098 } 5099 } 5100 5101 } // end namespace format 5102 } // end namespace clang 5103