1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" 51 << "T=" << (unsigned)I->Tok->getType() 52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 53 << "\"] "; 54 for (SmallVectorImpl<UnwrappedLine>::const_iterator 55 CI = I->Children.begin(), 56 CE = I->Children.end(); 57 CI != CE; ++CI) { 58 OS << "\n"; 59 printLine(OS, *CI, (Prefix + " ").str()); 60 NewLine = true; 61 } 62 } 63 if (!NewLine) 64 OS << "\n"; 65 } 66 67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 68 printLine(llvm::dbgs(), Line); 69 } 70 71 class ScopedDeclarationState { 72 public: 73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 74 bool MustBeDeclaration) 75 : Line(Line), Stack(Stack) { 76 Line.MustBeDeclaration = MustBeDeclaration; 77 Stack.push_back(MustBeDeclaration); 78 } 79 ~ScopedDeclarationState() { 80 Stack.pop_back(); 81 if (!Stack.empty()) 82 Line.MustBeDeclaration = Stack.back(); 83 else 84 Line.MustBeDeclaration = true; 85 } 86 87 private: 88 UnwrappedLine &Line; 89 llvm::BitVector &Stack; 90 }; 91 92 } // end anonymous namespace 93 94 std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) { 95 llvm::raw_os_ostream OS(Stream); 96 printLine(OS, Line); 97 return Stream; 98 } 99 100 class ScopedLineState { 101 public: 102 ScopedLineState(UnwrappedLineParser &Parser, 103 bool SwitchToPreprocessorLines = false) 104 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 105 if (SwitchToPreprocessorLines) 106 Parser.CurrentLines = &Parser.PreprocessorDirectives; 107 else if (!Parser.Line->Tokens.empty()) 108 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 109 PreBlockLine = std::move(Parser.Line); 110 Parser.Line = std::make_unique<UnwrappedLine>(); 111 Parser.Line->Level = PreBlockLine->Level; 112 Parser.Line->PPLevel = PreBlockLine->PPLevel; 113 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 114 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 115 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel; 116 } 117 118 ~ScopedLineState() { 119 if (!Parser.Line->Tokens.empty()) 120 Parser.addUnwrappedLine(); 121 assert(Parser.Line->Tokens.empty()); 122 Parser.Line = std::move(PreBlockLine); 123 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 124 Parser.MustBreakBeforeNextToken = true; 125 Parser.CurrentLines = OriginalLines; 126 } 127 128 private: 129 UnwrappedLineParser &Parser; 130 131 std::unique_ptr<UnwrappedLine> PreBlockLine; 132 SmallVectorImpl<UnwrappedLine> *OriginalLines; 133 }; 134 135 class CompoundStatementIndenter { 136 public: 137 CompoundStatementIndenter(UnwrappedLineParser *Parser, 138 const FormatStyle &Style, unsigned &LineLevel) 139 : CompoundStatementIndenter(Parser, LineLevel, 140 Style.BraceWrapping.AfterControlStatement, 141 Style.BraceWrapping.IndentBraces) {} 142 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 143 bool WrapBrace, bool IndentBrace) 144 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 145 if (WrapBrace) 146 Parser->addUnwrappedLine(); 147 if (IndentBrace) 148 ++LineLevel; 149 } 150 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 151 152 private: 153 unsigned &LineLevel; 154 unsigned OldLineLevel; 155 }; 156 157 UnwrappedLineParser::UnwrappedLineParser( 158 SourceManager &SourceMgr, const FormatStyle &Style, 159 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 160 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 161 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 162 IdentifierTable &IdentTable) 163 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 164 CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()), 165 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), 166 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 167 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 168 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 169 ? IG_Rejected 170 : IG_Inited), 171 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 172 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) { 173 assert(IsCpp == LangOpts.CXXOperatorNames); 174 } 175 176 void UnwrappedLineParser::reset() { 177 PPBranchLevel = -1; 178 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 179 ? IG_Rejected 180 : IG_Inited; 181 IncludeGuardToken = nullptr; 182 Line.reset(new UnwrappedLine); 183 CommentsBeforeNextToken.clear(); 184 FormatTok = nullptr; 185 MustBreakBeforeNextToken = false; 186 IsDecltypeAutoFunction = false; 187 PreprocessorDirectives.clear(); 188 CurrentLines = &Lines; 189 DeclarationScopeStack.clear(); 190 NestedTooDeep.clear(); 191 NestedLambdas.clear(); 192 PPStack.clear(); 193 Line->FirstStartColumn = FirstStartColumn; 194 195 if (!Unexpanded.empty()) 196 for (FormatToken *Token : AllTokens) 197 Token->MacroCtx.reset(); 198 CurrentExpandedLines.clear(); 199 ExpandedLines.clear(); 200 Unexpanded.clear(); 201 InExpansion = false; 202 Reconstruct.reset(); 203 } 204 205 void UnwrappedLineParser::parse() { 206 IndexedTokenSource TokenSource(AllTokens); 207 Line->FirstStartColumn = FirstStartColumn; 208 do { 209 LLVM_DEBUG(llvm::dbgs() << "----\n"); 210 reset(); 211 Tokens = &TokenSource; 212 TokenSource.reset(); 213 214 readToken(); 215 parseFile(); 216 217 // If we found an include guard then all preprocessor directives (other than 218 // the guard) are over-indented by one. 219 if (IncludeGuard == IG_Found) { 220 for (auto &Line : Lines) 221 if (Line.InPPDirective && Line.Level > 0) 222 --Line.Level; 223 } 224 225 // Create line with eof token. 226 assert(eof()); 227 pushToken(FormatTok); 228 addUnwrappedLine(); 229 230 // In a first run, format everything with the lines containing macro calls 231 // replaced by the expansion. 232 if (!ExpandedLines.empty()) { 233 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 234 for (const auto &Line : Lines) { 235 if (!Line.Tokens.empty()) { 236 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 237 if (it != ExpandedLines.end()) { 238 for (const auto &Expanded : it->second) { 239 LLVM_DEBUG(printDebugInfo(Expanded)); 240 Callback.consumeUnwrappedLine(Expanded); 241 } 242 continue; 243 } 244 } 245 LLVM_DEBUG(printDebugInfo(Line)); 246 Callback.consumeUnwrappedLine(Line); 247 } 248 Callback.finishRun(); 249 } 250 251 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 252 for (const UnwrappedLine &Line : Lines) { 253 LLVM_DEBUG(printDebugInfo(Line)); 254 Callback.consumeUnwrappedLine(Line); 255 } 256 Callback.finishRun(); 257 Lines.clear(); 258 while (!PPLevelBranchIndex.empty() && 259 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 260 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 261 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 262 } 263 if (!PPLevelBranchIndex.empty()) { 264 ++PPLevelBranchIndex.back(); 265 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 266 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 267 } 268 } while (!PPLevelBranchIndex.empty()); 269 } 270 271 void UnwrappedLineParser::parseFile() { 272 // The top-level context in a file always has declarations, except for pre- 273 // processor directives and JavaScript files. 274 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 275 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 276 MustBeDeclaration); 277 if (Style.Language == FormatStyle::LK_TextProto) 278 parseBracedList(); 279 else 280 parseLevel(); 281 // Make sure to format the remaining tokens. 282 // 283 // LK_TextProto is special since its top-level is parsed as the body of a 284 // braced list, which does not necessarily have natural line separators such 285 // as a semicolon. Comments after the last entry that have been determined to 286 // not belong to that line, as in: 287 // key: value 288 // // endfile comment 289 // do not have a chance to be put on a line of their own until this point. 290 // Here we add this newline before end-of-file comments. 291 if (Style.Language == FormatStyle::LK_TextProto && 292 !CommentsBeforeNextToken.empty()) { 293 addUnwrappedLine(); 294 } 295 flushComments(true); 296 addUnwrappedLine(); 297 } 298 299 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 300 do { 301 switch (FormatTok->Tok.getKind()) { 302 case tok::l_brace: 303 return; 304 default: 305 if (FormatTok->is(Keywords.kw_where)) { 306 addUnwrappedLine(); 307 nextToken(); 308 parseCSharpGenericTypeConstraint(); 309 break; 310 } 311 nextToken(); 312 break; 313 } 314 } while (!eof()); 315 } 316 317 void UnwrappedLineParser::parseCSharpAttribute() { 318 int UnpairedSquareBrackets = 1; 319 do { 320 switch (FormatTok->Tok.getKind()) { 321 case tok::r_square: 322 nextToken(); 323 --UnpairedSquareBrackets; 324 if (UnpairedSquareBrackets == 0) { 325 addUnwrappedLine(); 326 return; 327 } 328 break; 329 case tok::l_square: 330 ++UnpairedSquareBrackets; 331 nextToken(); 332 break; 333 default: 334 nextToken(); 335 break; 336 } 337 } while (!eof()); 338 } 339 340 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 341 if (!Lines.empty() && Lines.back().InPPDirective) 342 return true; 343 344 const FormatToken *Previous = Tokens->getPreviousToken(); 345 return Previous && Previous->is(tok::comment) && 346 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 347 } 348 349 /// \brief Parses a level, that is ???. 350 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level. 351 /// \param IfKind The \p if statement kind in the level. 352 /// \param IfLeftBrace The left brace of the \p if block in the level. 353 /// \returns true if a simple block of if/else/for/while, or false otherwise. 354 /// (A simple block has a single statement.) 355 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 356 IfStmtKind *IfKind, 357 FormatToken **IfLeftBrace) { 358 const bool InRequiresExpression = 359 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 360 const bool IsPrecededByCommentOrPPDirective = 361 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 362 FormatToken *IfLBrace = nullptr; 363 bool HasDoWhile = false; 364 bool HasLabel = false; 365 unsigned StatementCount = 0; 366 bool SwitchLabelEncountered = false; 367 368 do { 369 if (FormatTok->isAttribute()) { 370 nextToken(); 371 if (FormatTok->is(tok::l_paren)) 372 parseParens(); 373 continue; 374 } 375 tok::TokenKind Kind = FormatTok->Tok.getKind(); 376 if (FormatTok->is(TT_MacroBlockBegin)) 377 Kind = tok::l_brace; 378 else if (FormatTok->is(TT_MacroBlockEnd)) 379 Kind = tok::r_brace; 380 381 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, 382 &HasLabel, &StatementCount] { 383 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace, 384 HasDoWhile ? nullptr : &HasDoWhile, 385 HasLabel ? nullptr : &HasLabel); 386 ++StatementCount; 387 assert(StatementCount > 0 && "StatementCount overflow!"); 388 }; 389 390 switch (Kind) { 391 case tok::comment: 392 nextToken(); 393 addUnwrappedLine(); 394 break; 395 case tok::l_brace: 396 if (InRequiresExpression) { 397 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 398 } else if (FormatTok->Previous && 399 FormatTok->Previous->ClosesRequiresClause) { 400 // We need the 'default' case here to correctly parse a function 401 // l_brace. 402 ParseDefault(); 403 continue; 404 } 405 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) { 406 if (tryToParseBracedList()) 407 continue; 408 FormatTok->setFinalizedType(TT_BlockLBrace); 409 } 410 parseBlock(); 411 ++StatementCount; 412 assert(StatementCount > 0 && "StatementCount overflow!"); 413 addUnwrappedLine(); 414 break; 415 case tok::r_brace: 416 if (OpeningBrace) { 417 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 418 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 419 return false; 420 } 421 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 422 HasDoWhile || IsPrecededByCommentOrPPDirective || 423 precededByCommentOrPPDirective()) { 424 return false; 425 } 426 const FormatToken *Next = Tokens->peekNextToken(); 427 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 428 return false; 429 if (IfLeftBrace) 430 *IfLeftBrace = IfLBrace; 431 return true; 432 } 433 nextToken(); 434 addUnwrappedLine(); 435 break; 436 case tok::kw_default: { 437 unsigned StoredPosition = Tokens->getPosition(); 438 auto *Next = Tokens->getNextNonComment(); 439 FormatTok = Tokens->setPosition(StoredPosition); 440 if (!Next->isOneOf(tok::colon, tok::arrow)) { 441 // default not followed by `:` or `->` is not a case label; treat it 442 // like an identifier. 443 parseStructuralElement(); 444 break; 445 } 446 // Else, if it is 'default:', fall through to the case handling. 447 [[fallthrough]]; 448 } 449 case tok::kw_case: 450 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() || 451 (Style.isJavaScript() && Line->MustBeDeclaration)) { 452 // Proto: there are no switch/case statements 453 // Verilog: Case labels don't have this word. We handle case 454 // labels including default in TokenAnnotator. 455 // JavaScript: A 'case: string' style field declaration. 456 ParseDefault(); 457 break; 458 } 459 if (!SwitchLabelEncountered && 460 (Style.IndentCaseLabels || 461 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) || 462 (Line->InPPDirective && Line->Level == 1))) { 463 ++Line->Level; 464 } 465 SwitchLabelEncountered = true; 466 parseStructuralElement(); 467 break; 468 case tok::l_square: 469 if (Style.isCSharp()) { 470 nextToken(); 471 parseCSharpAttribute(); 472 break; 473 } 474 if (handleCppAttributes()) 475 break; 476 [[fallthrough]]; 477 default: 478 ParseDefault(); 479 break; 480 } 481 } while (!eof()); 482 483 return false; 484 } 485 486 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 487 // We'll parse forward through the tokens until we hit 488 // a closing brace or eof - note that getNextToken() will 489 // parse macros, so this will magically work inside macro 490 // definitions, too. 491 unsigned StoredPosition = Tokens->getPosition(); 492 FormatToken *Tok = FormatTok; 493 const FormatToken *PrevTok = Tok->Previous; 494 // Keep a stack of positions of lbrace tokens. We will 495 // update information about whether an lbrace starts a 496 // braced init list or a different block during the loop. 497 struct StackEntry { 498 FormatToken *Tok; 499 const FormatToken *PrevTok; 500 }; 501 SmallVector<StackEntry, 8> LBraceStack; 502 assert(Tok->is(tok::l_brace)); 503 504 do { 505 auto *NextTok = Tokens->getNextNonComment(); 506 507 if (!Line->InMacroBody && !Style.isTableGen()) { 508 // Skip PPDirective lines and comments. 509 while (NextTok->is(tok::hash)) { 510 NextTok = Tokens->getNextToken(); 511 if (NextTok->is(tok::pp_not_keyword)) 512 break; 513 do { 514 NextTok = Tokens->getNextToken(); 515 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); 516 517 while (NextTok->is(tok::comment)) 518 NextTok = Tokens->getNextToken(); 519 } 520 } 521 522 switch (Tok->Tok.getKind()) { 523 case tok::l_brace: 524 if (Style.isJavaScript() && PrevTok) { 525 if (PrevTok->isOneOf(tok::colon, tok::less)) { 526 // A ':' indicates this code is in a type, or a braced list 527 // following a label in an object literal ({a: {b: 1}}). 528 // A '<' could be an object used in a comparison, but that is nonsense 529 // code (can never return true), so more likely it is a generic type 530 // argument (`X<{a: string; b: number}>`). 531 // The code below could be confused by semicolons between the 532 // individual members in a type member list, which would normally 533 // trigger BK_Block. In both cases, this must be parsed as an inline 534 // braced init. 535 Tok->setBlockKind(BK_BracedInit); 536 } else if (PrevTok->is(tok::r_paren)) { 537 // `) { }` can only occur in function or method declarations in JS. 538 Tok->setBlockKind(BK_Block); 539 } 540 } else { 541 Tok->setBlockKind(BK_Unknown); 542 } 543 LBraceStack.push_back({Tok, PrevTok}); 544 break; 545 case tok::r_brace: 546 if (LBraceStack.empty()) 547 break; 548 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) { 549 bool ProbablyBracedList = false; 550 if (Style.Language == FormatStyle::LK_Proto) { 551 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 552 } else if (LBrace->isNot(TT_EnumLBrace)) { 553 // Using OriginalColumn to distinguish between ObjC methods and 554 // binary operators is a bit hacky. 555 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 556 NextTok->OriginalColumn == 0; 557 558 // Try to detect a braced list. Note that regardless how we mark inner 559 // braces here, we will overwrite the BlockKind later if we parse a 560 // braced list (where all blocks inside are by default braced lists), 561 // or when we explicitly detect blocks (for example while parsing 562 // lambdas). 563 564 // If we already marked the opening brace as braced list, the closing 565 // must also be part of it. 566 ProbablyBracedList = LBrace->is(TT_BracedListLBrace); 567 568 ProbablyBracedList = ProbablyBracedList || 569 (Style.isJavaScript() && 570 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 571 Keywords.kw_as)); 572 ProbablyBracedList = 573 ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() || 574 NextTok->is(tok::l_paren))); 575 576 // If there is a comma, semicolon or right paren after the closing 577 // brace, we assume this is a braced initializer list. 578 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 579 // braced list in JS. 580 ProbablyBracedList = 581 ProbablyBracedList || 582 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 583 tok::r_paren, tok::r_square, tok::ellipsis); 584 585 // Distinguish between braced list in a constructor initializer list 586 // followed by constructor body, or just adjacent blocks. 587 ProbablyBracedList = 588 ProbablyBracedList || 589 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 590 LBraceStack.back().PrevTok->isOneOf(tok::identifier, 591 tok::greater)); 592 593 ProbablyBracedList = 594 ProbablyBracedList || 595 (NextTok->is(tok::identifier) && 596 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 597 598 ProbablyBracedList = ProbablyBracedList || 599 (NextTok->is(tok::semi) && 600 (!ExpectClassBody || LBraceStack.size() != 1)); 601 602 ProbablyBracedList = 603 ProbablyBracedList || 604 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 605 606 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 607 // We can have an array subscript after a braced init 608 // list, but C++11 attributes are expected after blocks. 609 NextTok = Tokens->getNextToken(); 610 ProbablyBracedList = NextTok->isNot(tok::l_square); 611 } 612 613 // Cpp macro definition body that is a nonempty braced list or block: 614 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && 615 !FormatTok->Previous && NextTok->is(tok::eof) && 616 // A statement can end with only `;` (simple statement), a block 617 // closing brace (compound statement), or `:` (label statement). 618 // If PrevTok is a block opening brace, Tok ends an empty block. 619 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) { 620 ProbablyBracedList = true; 621 } 622 } 623 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block; 624 Tok->setBlockKind(BlockKind); 625 LBrace->setBlockKind(BlockKind); 626 } 627 LBraceStack.pop_back(); 628 break; 629 case tok::identifier: 630 if (Tok->isNot(TT_StatementMacro)) 631 break; 632 [[fallthrough]]; 633 case tok::at: 634 case tok::semi: 635 case tok::kw_if: 636 case tok::kw_while: 637 case tok::kw_for: 638 case tok::kw_switch: 639 case tok::kw_try: 640 case tok::kw___try: 641 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 642 LBraceStack.back().Tok->setBlockKind(BK_Block); 643 break; 644 default: 645 break; 646 } 647 648 PrevTok = Tok; 649 Tok = NextTok; 650 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 651 652 // Assume other blocks for all unclosed opening braces. 653 for (const auto &Entry : LBraceStack) 654 if (Entry.Tok->is(BK_Unknown)) 655 Entry.Tok->setBlockKind(BK_Block); 656 657 FormatTok = Tokens->setPosition(StoredPosition); 658 } 659 660 // Sets the token type of the directly previous right brace. 661 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { 662 if (auto Prev = FormatTok->getPreviousNonComment(); 663 Prev && Prev->is(tok::r_brace)) { 664 Prev->setFinalizedType(Type); 665 } 666 } 667 668 template <class T> 669 static inline void hash_combine(std::size_t &seed, const T &v) { 670 std::hash<T> hasher; 671 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 672 } 673 674 size_t UnwrappedLineParser::computePPHash() const { 675 size_t h = 0; 676 for (const auto &i : PPStack) { 677 hash_combine(h, size_t(i.Kind)); 678 hash_combine(h, i.Line); 679 } 680 return h; 681 } 682 683 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 684 // is not null, subtracts its length (plus the preceding space) when computing 685 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 686 // running the token annotator on it so that we can restore them afterward. 687 bool UnwrappedLineParser::mightFitOnOneLine( 688 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 689 const auto ColumnLimit = Style.ColumnLimit; 690 if (ColumnLimit == 0) 691 return true; 692 693 auto &Tokens = ParsedLine.Tokens; 694 assert(!Tokens.empty()); 695 696 const auto *LastToken = Tokens.back().Tok; 697 assert(LastToken); 698 699 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 700 701 int Index = 0; 702 for (const auto &Token : Tokens) { 703 assert(Token.Tok); 704 auto &SavedToken = SavedTokens[Index++]; 705 SavedToken.Tok = new FormatToken; 706 SavedToken.Tok->copyFrom(*Token.Tok); 707 SavedToken.Children = std::move(Token.Children); 708 } 709 710 AnnotatedLine Line(ParsedLine); 711 assert(Line.Last == LastToken); 712 713 TokenAnnotator Annotator(Style, Keywords); 714 Annotator.annotate(Line); 715 Annotator.calculateFormattingInformation(Line); 716 717 auto Length = LastToken->TotalLength; 718 if (OpeningBrace) { 719 assert(OpeningBrace != Tokens.front().Tok); 720 if (auto Prev = OpeningBrace->Previous; 721 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 722 Length -= ColumnLimit; 723 } 724 Length -= OpeningBrace->TokenText.size() + 1; 725 } 726 727 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 728 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 729 Length -= FirstToken->TokenText.size() + 1; 730 } 731 732 Index = 0; 733 for (auto &Token : Tokens) { 734 const auto &SavedToken = SavedTokens[Index++]; 735 Token.Tok->copyFrom(*SavedToken.Tok); 736 Token.Children = std::move(SavedToken.Children); 737 delete SavedToken.Tok; 738 } 739 740 // If these change PPLevel needs to be used for get correct indentation. 741 assert(!Line.InMacroBody); 742 assert(!Line.InPPDirective); 743 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 744 } 745 746 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 747 unsigned AddLevels, bool MunchSemi, 748 bool KeepBraces, 749 IfStmtKind *IfKind, 750 bool UnindentWhitesmithsBraces) { 751 auto HandleVerilogBlockLabel = [this]() { 752 // ":" name 753 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 754 nextToken(); 755 if (Keywords.isVerilogIdentifier(*FormatTok)) 756 nextToken(); 757 } 758 }; 759 760 // Whether this is a Verilog-specific block that has a special header like a 761 // module. 762 const bool VerilogHierarchy = 763 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 764 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 765 (Style.isVerilog() && 766 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 767 "'{' or macro block token expected"); 768 FormatToken *Tok = FormatTok; 769 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 770 auto Index = CurrentLines->size(); 771 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 772 FormatTok->setBlockKind(BK_Block); 773 774 // For Whitesmiths mode, jump to the next level prior to skipping over the 775 // braces. 776 if (!VerilogHierarchy && AddLevels > 0 && 777 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 778 ++Line->Level; 779 } 780 781 size_t PPStartHash = computePPHash(); 782 783 const unsigned InitialLevel = Line->Level; 784 if (VerilogHierarchy) { 785 AddLevels += parseVerilogHierarchyHeader(); 786 } else { 787 nextToken(/*LevelDifference=*/AddLevels); 788 HandleVerilogBlockLabel(); 789 } 790 791 // Bail out if there are too many levels. Otherwise, the stack might overflow. 792 if (Line->Level > 300) 793 return nullptr; 794 795 if (MacroBlock && FormatTok->is(tok::l_paren)) 796 parseParens(); 797 798 size_t NbPreprocessorDirectives = 799 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 800 addUnwrappedLine(); 801 size_t OpeningLineIndex = 802 CurrentLines->empty() 803 ? (UnwrappedLine::kInvalidIndex) 804 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 805 806 // Whitesmiths is weird here. The brace needs to be indented for the namespace 807 // block, but the block itself may not be indented depending on the style 808 // settings. This allows the format to back up one level in those cases. 809 if (UnindentWhitesmithsBraces) 810 --Line->Level; 811 812 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 813 MustBeDeclaration); 814 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 815 Line->Level += AddLevels; 816 817 FormatToken *IfLBrace = nullptr; 818 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace); 819 820 if (eof()) 821 return IfLBrace; 822 823 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd) 824 : FormatTok->isNot(tok::r_brace)) { 825 Line->Level = InitialLevel; 826 FormatTok->setBlockKind(BK_Block); 827 return IfLBrace; 828 } 829 830 if (FormatTok->is(tok::r_brace)) { 831 FormatTok->setBlockKind(BK_Block); 832 if (Tok->is(TT_NamespaceLBrace)) 833 FormatTok->setFinalizedType(TT_NamespaceRBrace); 834 } 835 836 const bool IsFunctionRBrace = 837 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 838 839 auto RemoveBraces = [=]() mutable { 840 if (!SimpleBlock) 841 return false; 842 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 843 assert(FormatTok->is(tok::r_brace)); 844 const bool WrappedOpeningBrace = !Tok->Previous; 845 if (WrappedOpeningBrace && FollowedByComment) 846 return false; 847 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 848 if (KeepBraces && !HasRequiredIfBraces) 849 return false; 850 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 851 const FormatToken *Previous = Tokens->getPreviousToken(); 852 assert(Previous); 853 if (Previous->is(tok::r_brace) && !Previous->Optional) 854 return false; 855 } 856 assert(!CurrentLines->empty()); 857 auto &LastLine = CurrentLines->back(); 858 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 859 return false; 860 if (Tok->is(TT_ElseLBrace)) 861 return true; 862 if (WrappedOpeningBrace) { 863 assert(Index > 0); 864 --Index; // The line above the wrapped l_brace. 865 Tok = nullptr; 866 } 867 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 868 }; 869 if (RemoveBraces()) { 870 Tok->MatchingParen = FormatTok; 871 FormatTok->MatchingParen = Tok; 872 } 873 874 size_t PPEndHash = computePPHash(); 875 876 // Munch the closing brace. 877 nextToken(/*LevelDifference=*/-AddLevels); 878 879 // When this is a function block and there is an unnecessary semicolon 880 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 881 // it later). 882 if (Style.RemoveSemicolon && IsFunctionRBrace) { 883 while (FormatTok->is(tok::semi)) { 884 FormatTok->Optional = true; 885 nextToken(); 886 } 887 } 888 889 HandleVerilogBlockLabel(); 890 891 if (MacroBlock && FormatTok->is(tok::l_paren)) 892 parseParens(); 893 894 Line->Level = InitialLevel; 895 896 if (FormatTok->is(tok::kw_noexcept)) { 897 // A noexcept in a requires expression. 898 nextToken(); 899 } 900 901 if (FormatTok->is(tok::arrow)) { 902 // Following the } or noexcept we can find a trailing return type arrow 903 // as part of an implicit conversion constraint. 904 nextToken(); 905 parseStructuralElement(); 906 } 907 908 if (MunchSemi && FormatTok->is(tok::semi)) 909 nextToken(); 910 911 if (PPStartHash == PPEndHash) { 912 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 913 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 914 // Update the opening line to add the forward reference as well 915 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 916 CurrentLines->size() - 1; 917 } 918 } 919 920 return IfLBrace; 921 } 922 923 static bool isGoogScope(const UnwrappedLine &Line) { 924 // FIXME: Closure-library specific stuff should not be hard-coded but be 925 // configurable. 926 if (Line.Tokens.size() < 4) 927 return false; 928 auto I = Line.Tokens.begin(); 929 if (I->Tok->TokenText != "goog") 930 return false; 931 ++I; 932 if (I->Tok->isNot(tok::period)) 933 return false; 934 ++I; 935 if (I->Tok->TokenText != "scope") 936 return false; 937 ++I; 938 return I->Tok->is(tok::l_paren); 939 } 940 941 static bool isIIFE(const UnwrappedLine &Line, 942 const AdditionalKeywords &Keywords) { 943 // Look for the start of an immediately invoked anonymous function. 944 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 945 // This is commonly done in JavaScript to create a new, anonymous scope. 946 // Example: (function() { ... })() 947 if (Line.Tokens.size() < 3) 948 return false; 949 auto I = Line.Tokens.begin(); 950 if (I->Tok->isNot(tok::l_paren)) 951 return false; 952 ++I; 953 if (I->Tok->isNot(Keywords.kw_function)) 954 return false; 955 ++I; 956 return I->Tok->is(tok::l_paren); 957 } 958 959 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 960 const FormatToken &InitialToken) { 961 tok::TokenKind Kind = InitialToken.Tok.getKind(); 962 if (InitialToken.is(TT_NamespaceMacro)) 963 Kind = tok::kw_namespace; 964 965 switch (Kind) { 966 case tok::kw_namespace: 967 return Style.BraceWrapping.AfterNamespace; 968 case tok::kw_class: 969 return Style.BraceWrapping.AfterClass; 970 case tok::kw_union: 971 return Style.BraceWrapping.AfterUnion; 972 case tok::kw_struct: 973 return Style.BraceWrapping.AfterStruct; 974 case tok::kw_enum: 975 return Style.BraceWrapping.AfterEnum; 976 default: 977 return false; 978 } 979 } 980 981 void UnwrappedLineParser::parseChildBlock() { 982 assert(FormatTok->is(tok::l_brace)); 983 FormatTok->setBlockKind(BK_Block); 984 const FormatToken *OpeningBrace = FormatTok; 985 nextToken(); 986 { 987 bool SkipIndent = (Style.isJavaScript() && 988 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 989 ScopedLineState LineState(*this); 990 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 991 /*MustBeDeclaration=*/false); 992 Line->Level += SkipIndent ? 0 : 1; 993 parseLevel(OpeningBrace); 994 flushComments(isOnNewLine(*FormatTok)); 995 Line->Level -= SkipIndent ? 0 : 1; 996 } 997 nextToken(); 998 } 999 1000 void UnwrappedLineParser::parsePPDirective() { 1001 assert(FormatTok->is(tok::hash) && "'#' expected"); 1002 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 1003 1004 nextToken(); 1005 1006 if (!FormatTok->Tok.getIdentifierInfo()) { 1007 parsePPUnknown(); 1008 return; 1009 } 1010 1011 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 1012 case tok::pp_define: 1013 parsePPDefine(); 1014 return; 1015 case tok::pp_if: 1016 parsePPIf(/*IfDef=*/false); 1017 break; 1018 case tok::pp_ifdef: 1019 case tok::pp_ifndef: 1020 parsePPIf(/*IfDef=*/true); 1021 break; 1022 case tok::pp_else: 1023 case tok::pp_elifdef: 1024 case tok::pp_elifndef: 1025 case tok::pp_elif: 1026 parsePPElse(); 1027 break; 1028 case tok::pp_endif: 1029 parsePPEndIf(); 1030 break; 1031 case tok::pp_pragma: 1032 parsePPPragma(); 1033 break; 1034 default: 1035 parsePPUnknown(); 1036 break; 1037 } 1038 } 1039 1040 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1041 size_t Line = CurrentLines->size(); 1042 if (CurrentLines == &PreprocessorDirectives) 1043 Line += Lines.size(); 1044 1045 if (Unreachable || 1046 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1047 PPStack.push_back({PP_Unreachable, Line}); 1048 } else { 1049 PPStack.push_back({PP_Conditional, Line}); 1050 } 1051 } 1052 1053 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1054 ++PPBranchLevel; 1055 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1056 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1057 PPLevelBranchIndex.push_back(0); 1058 PPLevelBranchCount.push_back(0); 1059 } 1060 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1061 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1062 conditionalCompilationCondition(Unreachable || Skip); 1063 } 1064 1065 void UnwrappedLineParser::conditionalCompilationAlternative() { 1066 if (!PPStack.empty()) 1067 PPStack.pop_back(); 1068 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1069 if (!PPChainBranchIndex.empty()) 1070 ++PPChainBranchIndex.top(); 1071 conditionalCompilationCondition( 1072 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1073 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1074 } 1075 1076 void UnwrappedLineParser::conditionalCompilationEnd() { 1077 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1078 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1079 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1080 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1081 } 1082 // Guard against #endif's without #if. 1083 if (PPBranchLevel > -1) 1084 --PPBranchLevel; 1085 if (!PPChainBranchIndex.empty()) 1086 PPChainBranchIndex.pop(); 1087 if (!PPStack.empty()) 1088 PPStack.pop_back(); 1089 } 1090 1091 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1092 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1093 nextToken(); 1094 bool Unreachable = false; 1095 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1096 Unreachable = true; 1097 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1098 Unreachable = true; 1099 conditionalCompilationStart(Unreachable); 1100 FormatToken *IfCondition = FormatTok; 1101 // If there's a #ifndef on the first line, and the only lines before it are 1102 // comments, it could be an include guard. 1103 bool MaybeIncludeGuard = IfNDef; 1104 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1105 for (auto &Line : Lines) { 1106 if (Line.Tokens.front().Tok->isNot(tok::comment)) { 1107 MaybeIncludeGuard = false; 1108 IncludeGuard = IG_Rejected; 1109 break; 1110 } 1111 } 1112 } 1113 --PPBranchLevel; 1114 parsePPUnknown(); 1115 ++PPBranchLevel; 1116 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1117 IncludeGuard = IG_IfNdefed; 1118 IncludeGuardToken = IfCondition; 1119 } 1120 } 1121 1122 void UnwrappedLineParser::parsePPElse() { 1123 // If a potential include guard has an #else, it's not an include guard. 1124 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1125 IncludeGuard = IG_Rejected; 1126 // Don't crash when there is an #else without an #if. 1127 assert(PPBranchLevel >= -1); 1128 if (PPBranchLevel == -1) 1129 conditionalCompilationStart(/*Unreachable=*/true); 1130 conditionalCompilationAlternative(); 1131 --PPBranchLevel; 1132 parsePPUnknown(); 1133 ++PPBranchLevel; 1134 } 1135 1136 void UnwrappedLineParser::parsePPEndIf() { 1137 conditionalCompilationEnd(); 1138 parsePPUnknown(); 1139 // If the #endif of a potential include guard is the last thing in the file, 1140 // then we found an include guard. 1141 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1142 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1143 IncludeGuard = IG_Found; 1144 } 1145 } 1146 1147 void UnwrappedLineParser::parsePPDefine() { 1148 nextToken(); 1149 1150 if (!FormatTok->Tok.getIdentifierInfo()) { 1151 IncludeGuard = IG_Rejected; 1152 IncludeGuardToken = nullptr; 1153 parsePPUnknown(); 1154 return; 1155 } 1156 1157 if (IncludeGuard == IG_IfNdefed && 1158 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1159 IncludeGuard = IG_Defined; 1160 IncludeGuardToken = nullptr; 1161 for (auto &Line : Lines) { 1162 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1163 IncludeGuard = IG_Rejected; 1164 break; 1165 } 1166 } 1167 } 1168 1169 // In the context of a define, even keywords should be treated as normal 1170 // identifiers. Setting the kind to identifier is not enough, because we need 1171 // to treat additional keywords like __except as well, which are already 1172 // identifiers. Setting the identifier info to null interferes with include 1173 // guard processing above, and changes preprocessing nesting. 1174 FormatTok->Tok.setKind(tok::identifier); 1175 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1176 nextToken(); 1177 if (FormatTok->Tok.getKind() == tok::l_paren && 1178 !FormatTok->hasWhitespaceBefore()) { 1179 parseParens(); 1180 } 1181 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1182 Line->Level += PPBranchLevel + 1; 1183 addUnwrappedLine(); 1184 ++Line->Level; 1185 1186 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1187 assert((int)Line->PPLevel >= 0); 1188 Line->InMacroBody = true; 1189 1190 if (Style.SkipMacroDefinitionBody) { 1191 while (!eof()) { 1192 FormatTok->Finalized = true; 1193 FormatTok = Tokens->getNextToken(); 1194 } 1195 addUnwrappedLine(); 1196 return; 1197 } 1198 1199 // Errors during a preprocessor directive can only affect the layout of the 1200 // preprocessor directive, and thus we ignore them. An alternative approach 1201 // would be to use the same approach we use on the file level (no 1202 // re-indentation if there was a structural error) within the macro 1203 // definition. 1204 parseFile(); 1205 } 1206 1207 void UnwrappedLineParser::parsePPPragma() { 1208 Line->InPragmaDirective = true; 1209 parsePPUnknown(); 1210 } 1211 1212 void UnwrappedLineParser::parsePPUnknown() { 1213 do { 1214 nextToken(); 1215 } while (!eof()); 1216 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1217 Line->Level += PPBranchLevel + 1; 1218 addUnwrappedLine(); 1219 } 1220 1221 // Here we exclude certain tokens that are not usually the first token in an 1222 // unwrapped line. This is used in attempt to distinguish macro calls without 1223 // trailing semicolons from other constructs split to several lines. 1224 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1225 // Semicolon can be a null-statement, l_square can be a start of a macro or 1226 // a C++11 attribute, but this doesn't seem to be common. 1227 return !Tok.isOneOf(tok::semi, tok::l_brace, 1228 // Tokens that can only be used as binary operators and a 1229 // part of overloaded operator names. 1230 tok::period, tok::periodstar, tok::arrow, tok::arrowstar, 1231 tok::less, tok::greater, tok::slash, tok::percent, 1232 tok::lessless, tok::greatergreater, tok::equal, 1233 tok::plusequal, tok::minusequal, tok::starequal, 1234 tok::slashequal, tok::percentequal, tok::ampequal, 1235 tok::pipeequal, tok::caretequal, tok::greatergreaterequal, 1236 tok::lesslessequal, 1237 // Colon is used in labels, base class lists, initializer 1238 // lists, range-based for loops, ternary operator, but 1239 // should never be the first token in an unwrapped line. 1240 tok::colon, 1241 // 'noexcept' is a trailing annotation. 1242 tok::kw_noexcept); 1243 } 1244 1245 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1246 const FormatToken *FormatTok) { 1247 // FIXME: This returns true for C/C++ keywords like 'struct'. 1248 return FormatTok->is(tok::identifier) && 1249 (!FormatTok->Tok.getIdentifierInfo() || 1250 !FormatTok->isOneOf( 1251 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1252 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1253 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1254 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1255 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1256 Keywords.kw_instanceof, Keywords.kw_interface, 1257 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1258 } 1259 1260 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1261 const FormatToken *FormatTok) { 1262 return FormatTok->Tok.isLiteral() || 1263 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1264 mustBeJSIdent(Keywords, FormatTok); 1265 } 1266 1267 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1268 // when encountered after a value (see mustBeJSIdentOrValue). 1269 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1270 const FormatToken *FormatTok) { 1271 return FormatTok->isOneOf( 1272 tok::kw_return, Keywords.kw_yield, 1273 // conditionals 1274 tok::kw_if, tok::kw_else, 1275 // loops 1276 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1277 // switch/case 1278 tok::kw_switch, tok::kw_case, 1279 // exceptions 1280 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1281 // declaration 1282 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1283 Keywords.kw_async, Keywords.kw_function, 1284 // import/export 1285 Keywords.kw_import, tok::kw_export); 1286 } 1287 1288 // Checks whether a token is a type in K&R C (aka C78). 1289 static bool isC78Type(const FormatToken &Tok) { 1290 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1291 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1292 tok::identifier); 1293 } 1294 1295 // This function checks whether a token starts the first parameter declaration 1296 // in a K&R C (aka C78) function definition, e.g.: 1297 // int f(a, b) 1298 // short a, b; 1299 // { 1300 // return a + b; 1301 // } 1302 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1303 const FormatToken *FuncName) { 1304 assert(Tok); 1305 assert(Next); 1306 assert(FuncName); 1307 1308 if (FuncName->isNot(tok::identifier)) 1309 return false; 1310 1311 const FormatToken *Prev = FuncName->Previous; 1312 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1313 return false; 1314 1315 if (!isC78Type(*Tok) && 1316 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1317 return false; 1318 } 1319 1320 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1321 return false; 1322 1323 Tok = Tok->Previous; 1324 if (!Tok || Tok->isNot(tok::r_paren)) 1325 return false; 1326 1327 Tok = Tok->Previous; 1328 if (!Tok || Tok->isNot(tok::identifier)) 1329 return false; 1330 1331 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1332 } 1333 1334 bool UnwrappedLineParser::parseModuleImport() { 1335 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1336 1337 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1338 !Token->Tok.getIdentifierInfo() && 1339 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1340 return false; 1341 } 1342 1343 nextToken(); 1344 while (!eof()) { 1345 if (FormatTok->is(tok::colon)) { 1346 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1347 } 1348 // Handle import <foo/bar.h> as we would an include statement. 1349 else if (FormatTok->is(tok::less)) { 1350 nextToken(); 1351 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1352 // Mark tokens up to the trailing line comments as implicit string 1353 // literals. 1354 if (FormatTok->isNot(tok::comment) && 1355 !FormatTok->TokenText.starts_with("//")) { 1356 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1357 } 1358 nextToken(); 1359 } 1360 } 1361 if (FormatTok->is(tok::semi)) { 1362 nextToken(); 1363 break; 1364 } 1365 nextToken(); 1366 } 1367 1368 addUnwrappedLine(); 1369 return true; 1370 } 1371 1372 // readTokenWithJavaScriptASI reads the next token and terminates the current 1373 // line if JavaScript Automatic Semicolon Insertion must 1374 // happen between the current token and the next token. 1375 // 1376 // This method is conservative - it cannot cover all edge cases of JavaScript, 1377 // but only aims to correctly handle certain well known cases. It *must not* 1378 // return true in speculative cases. 1379 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1380 FormatToken *Previous = FormatTok; 1381 readToken(); 1382 FormatToken *Next = FormatTok; 1383 1384 bool IsOnSameLine = 1385 CommentsBeforeNextToken.empty() 1386 ? Next->NewlinesBefore == 0 1387 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1388 if (IsOnSameLine) 1389 return; 1390 1391 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1392 bool PreviousStartsTemplateExpr = 1393 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${"); 1394 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1395 // If the line contains an '@' sign, the previous token might be an 1396 // annotation, which can precede another identifier/value. 1397 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1398 return LineNode.Tok->is(tok::at); 1399 }); 1400 if (HasAt) 1401 return; 1402 } 1403 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1404 return addUnwrappedLine(); 1405 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1406 bool NextEndsTemplateExpr = 1407 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}"); 1408 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1409 (PreviousMustBeValue || 1410 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1411 tok::minusminus))) { 1412 return addUnwrappedLine(); 1413 } 1414 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1415 isJSDeclOrStmt(Keywords, Next)) { 1416 return addUnwrappedLine(); 1417 } 1418 } 1419 1420 void UnwrappedLineParser::parseStructuralElement( 1421 const FormatToken *OpeningBrace, IfStmtKind *IfKind, 1422 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1423 if (Style.Language == FormatStyle::LK_TableGen && 1424 FormatTok->is(tok::pp_include)) { 1425 nextToken(); 1426 if (FormatTok->is(tok::string_literal)) 1427 nextToken(); 1428 addUnwrappedLine(); 1429 return; 1430 } 1431 1432 if (IsCpp) { 1433 while (FormatTok->is(tok::l_square) && handleCppAttributes()) { 1434 } 1435 } else if (Style.isVerilog()) { 1436 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1437 parseForOrWhileLoop(/*HasParens=*/false); 1438 return; 1439 } 1440 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1441 parseForOrWhileLoop(); 1442 return; 1443 } 1444 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1445 Keywords.kw_assume, Keywords.kw_cover)) { 1446 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1447 return; 1448 } 1449 1450 // Skip things that can exist before keywords like 'if' and 'case'. 1451 while (true) { 1452 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1453 Keywords.kw_unique0)) { 1454 nextToken(); 1455 } else if (FormatTok->is(tok::l_paren) && 1456 Tokens->peekNextToken()->is(tok::star)) { 1457 parseParens(); 1458 } else { 1459 break; 1460 } 1461 } 1462 } 1463 1464 // Tokens that only make sense at the beginning of a line. 1465 if (FormatTok->isAccessSpecifierKeyword()) { 1466 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1467 Style.isCSharp()) { 1468 nextToken(); 1469 } else { 1470 parseAccessSpecifier(); 1471 } 1472 return; 1473 } 1474 switch (FormatTok->Tok.getKind()) { 1475 case tok::kw_asm: 1476 nextToken(); 1477 if (FormatTok->is(tok::l_brace)) { 1478 FormatTok->setFinalizedType(TT_InlineASMBrace); 1479 nextToken(); 1480 while (FormatTok && !eof()) { 1481 if (FormatTok->is(tok::r_brace)) { 1482 FormatTok->setFinalizedType(TT_InlineASMBrace); 1483 nextToken(); 1484 addUnwrappedLine(); 1485 break; 1486 } 1487 FormatTok->Finalized = true; 1488 nextToken(); 1489 } 1490 } 1491 break; 1492 case tok::kw_namespace: 1493 parseNamespace(); 1494 return; 1495 case tok::kw_if: { 1496 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1497 // field/method declaration. 1498 break; 1499 } 1500 FormatToken *Tok = parseIfThenElse(IfKind); 1501 if (IfLeftBrace) 1502 *IfLeftBrace = Tok; 1503 return; 1504 } 1505 case tok::kw_for: 1506 case tok::kw_while: 1507 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1508 // field/method declaration. 1509 break; 1510 } 1511 parseForOrWhileLoop(); 1512 return; 1513 case tok::kw_do: 1514 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1515 // field/method declaration. 1516 break; 1517 } 1518 parseDoWhile(); 1519 if (HasDoWhile) 1520 *HasDoWhile = true; 1521 return; 1522 case tok::kw_switch: 1523 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1524 // 'switch: string' field declaration. 1525 break; 1526 } 1527 parseSwitch(/*IsExpr=*/false); 1528 return; 1529 case tok::kw_default: { 1530 // In Verilog default along with other labels are handled in the next loop. 1531 if (Style.isVerilog()) 1532 break; 1533 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1534 // 'default: string' field declaration. 1535 break; 1536 } 1537 auto *Default = FormatTok; 1538 nextToken(); 1539 if (FormatTok->is(tok::colon)) { 1540 FormatTok->setFinalizedType(TT_CaseLabelColon); 1541 parseLabel(); 1542 return; 1543 } 1544 if (FormatTok->is(tok::arrow)) { 1545 FormatTok->setFinalizedType(TT_CaseLabelArrow); 1546 Default->setFinalizedType(TT_SwitchExpressionLabel); 1547 parseLabel(); 1548 return; 1549 } 1550 // e.g. "default void f() {}" in a Java interface. 1551 break; 1552 } 1553 case tok::kw_case: 1554 // Proto: there are no switch/case statements. 1555 if (Style.Language == FormatStyle::LK_Proto) { 1556 nextToken(); 1557 return; 1558 } 1559 if (Style.isVerilog()) { 1560 parseBlock(); 1561 addUnwrappedLine(); 1562 return; 1563 } 1564 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1565 // 'case: string' field declaration. 1566 nextToken(); 1567 break; 1568 } 1569 parseCaseLabel(); 1570 return; 1571 case tok::kw_try: 1572 case tok::kw___try: 1573 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1574 // field/method declaration. 1575 break; 1576 } 1577 parseTryCatch(); 1578 return; 1579 case tok::kw_extern: 1580 nextToken(); 1581 if (Style.isVerilog()) { 1582 // In Verilog and extern module declaration looks like a start of module. 1583 // But there is no body and endmodule. So we handle it separately. 1584 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1585 parseVerilogHierarchyHeader(); 1586 return; 1587 } 1588 } else if (FormatTok->is(tok::string_literal)) { 1589 nextToken(); 1590 if (FormatTok->is(tok::l_brace)) { 1591 if (Style.BraceWrapping.AfterExternBlock) 1592 addUnwrappedLine(); 1593 // Either we indent or for backwards compatibility we follow the 1594 // AfterExternBlock style. 1595 unsigned AddLevels = 1596 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1597 (Style.BraceWrapping.AfterExternBlock && 1598 Style.IndentExternBlock == 1599 FormatStyle::IEBS_AfterExternBlock) 1600 ? 1u 1601 : 0u; 1602 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1603 addUnwrappedLine(); 1604 return; 1605 } 1606 } 1607 break; 1608 case tok::kw_export: 1609 if (Style.isJavaScript()) { 1610 parseJavaScriptEs6ImportExport(); 1611 return; 1612 } 1613 if (IsCpp) { 1614 nextToken(); 1615 if (FormatTok->is(tok::kw_namespace)) { 1616 parseNamespace(); 1617 return; 1618 } 1619 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1620 return; 1621 } 1622 break; 1623 case tok::kw_inline: 1624 nextToken(); 1625 if (FormatTok->is(tok::kw_namespace)) { 1626 parseNamespace(); 1627 return; 1628 } 1629 break; 1630 case tok::identifier: 1631 if (FormatTok->is(TT_ForEachMacro)) { 1632 parseForOrWhileLoop(); 1633 return; 1634 } 1635 if (FormatTok->is(TT_MacroBlockBegin)) { 1636 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1637 /*MunchSemi=*/false); 1638 return; 1639 } 1640 if (FormatTok->is(Keywords.kw_import)) { 1641 if (Style.isJavaScript()) { 1642 parseJavaScriptEs6ImportExport(); 1643 return; 1644 } 1645 if (Style.Language == FormatStyle::LK_Proto) { 1646 nextToken(); 1647 if (FormatTok->is(tok::kw_public)) 1648 nextToken(); 1649 if (FormatTok->isNot(tok::string_literal)) 1650 return; 1651 nextToken(); 1652 if (FormatTok->is(tok::semi)) 1653 nextToken(); 1654 addUnwrappedLine(); 1655 return; 1656 } 1657 if (IsCpp && parseModuleImport()) 1658 return; 1659 } 1660 if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1661 Keywords.kw_slots, Keywords.kw_qslots)) { 1662 nextToken(); 1663 if (FormatTok->is(tok::colon)) { 1664 nextToken(); 1665 addUnwrappedLine(); 1666 return; 1667 } 1668 } 1669 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 1670 parseStatementMacro(); 1671 return; 1672 } 1673 if (IsCpp && FormatTok->is(TT_NamespaceMacro)) { 1674 parseNamespace(); 1675 return; 1676 } 1677 // In Verilog labels can be any expression, so we don't do them here. 1678 // JS doesn't have macros, and within classes colons indicate fields, not 1679 // labels. 1680 // TableGen doesn't have labels. 1681 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() && 1682 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) { 1683 nextToken(); 1684 if (!Line->InMacroBody || CurrentLines->size() > 1) 1685 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1686 FormatTok->setFinalizedType(TT_GotoLabelColon); 1687 parseLabel(!Style.IndentGotoLabels); 1688 if (HasLabel) 1689 *HasLabel = true; 1690 return; 1691 } 1692 // In all other cases, parse the declaration. 1693 break; 1694 default: 1695 break; 1696 } 1697 1698 for (const bool InRequiresExpression = 1699 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 1700 !eof();) { 1701 if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) { 1702 if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true); 1703 Next && Next->isBinaryOperator()) { 1704 FormatTok->Tok.setKind(tok::identifier); 1705 } 1706 } 1707 const FormatToken *Previous = FormatTok->Previous; 1708 switch (FormatTok->Tok.getKind()) { 1709 case tok::at: 1710 nextToken(); 1711 if (FormatTok->is(tok::l_brace)) { 1712 nextToken(); 1713 parseBracedList(); 1714 break; 1715 } else if (Style.Language == FormatStyle::LK_Java && 1716 FormatTok->is(Keywords.kw_interface)) { 1717 nextToken(); 1718 break; 1719 } 1720 switch (FormatTok->Tok.getObjCKeywordID()) { 1721 case tok::objc_public: 1722 case tok::objc_protected: 1723 case tok::objc_package: 1724 case tok::objc_private: 1725 return parseAccessSpecifier(); 1726 case tok::objc_interface: 1727 case tok::objc_implementation: 1728 return parseObjCInterfaceOrImplementation(); 1729 case tok::objc_protocol: 1730 if (parseObjCProtocol()) 1731 return; 1732 break; 1733 case tok::objc_end: 1734 return; // Handled by the caller. 1735 case tok::objc_optional: 1736 case tok::objc_required: 1737 nextToken(); 1738 addUnwrappedLine(); 1739 return; 1740 case tok::objc_autoreleasepool: 1741 nextToken(); 1742 if (FormatTok->is(tok::l_brace)) { 1743 if (Style.BraceWrapping.AfterControlStatement == 1744 FormatStyle::BWACS_Always) { 1745 addUnwrappedLine(); 1746 } 1747 parseBlock(); 1748 } 1749 addUnwrappedLine(); 1750 return; 1751 case tok::objc_synchronized: 1752 nextToken(); 1753 if (FormatTok->is(tok::l_paren)) { 1754 // Skip synchronization object 1755 parseParens(); 1756 } 1757 if (FormatTok->is(tok::l_brace)) { 1758 if (Style.BraceWrapping.AfterControlStatement == 1759 FormatStyle::BWACS_Always) { 1760 addUnwrappedLine(); 1761 } 1762 parseBlock(); 1763 } 1764 addUnwrappedLine(); 1765 return; 1766 case tok::objc_try: 1767 // This branch isn't strictly necessary (the kw_try case below would 1768 // do this too after the tok::at is parsed above). But be explicit. 1769 parseTryCatch(); 1770 return; 1771 default: 1772 break; 1773 } 1774 break; 1775 case tok::kw_requires: { 1776 if (IsCpp) { 1777 bool ParsedClause = parseRequires(); 1778 if (ParsedClause) 1779 return; 1780 } else { 1781 nextToken(); 1782 } 1783 break; 1784 } 1785 case tok::kw_enum: 1786 // Ignore if this is part of "template <enum ..." or "... -> enum" or 1787 // "template <..., enum ...>". 1788 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) { 1789 nextToken(); 1790 break; 1791 } 1792 1793 // parseEnum falls through and does not yet add an unwrapped line as an 1794 // enum definition can start a structural element. 1795 if (!parseEnum()) 1796 break; 1797 // This only applies to C++ and Verilog. 1798 if (!IsCpp && !Style.isVerilog()) { 1799 addUnwrappedLine(); 1800 return; 1801 } 1802 break; 1803 case tok::kw_typedef: 1804 nextToken(); 1805 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1806 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1807 Keywords.kw_CF_CLOSED_ENUM, 1808 Keywords.kw_NS_CLOSED_ENUM)) { 1809 parseEnum(); 1810 } 1811 break; 1812 case tok::kw_class: 1813 if (Style.isVerilog()) { 1814 parseBlock(); 1815 addUnwrappedLine(); 1816 return; 1817 } 1818 if (Style.isTableGen()) { 1819 // Do nothing special. In this case the l_brace becomes FunctionLBrace. 1820 // This is same as def and so on. 1821 nextToken(); 1822 break; 1823 } 1824 [[fallthrough]]; 1825 case tok::kw_struct: 1826 case tok::kw_union: 1827 if (parseStructLike()) 1828 return; 1829 break; 1830 case tok::kw_decltype: 1831 nextToken(); 1832 if (FormatTok->is(tok::l_paren)) { 1833 parseParens(); 1834 assert(FormatTok->Previous); 1835 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, 1836 tok::l_paren)) { 1837 Line->SeenDecltypeAuto = true; 1838 } 1839 } 1840 break; 1841 case tok::period: 1842 nextToken(); 1843 // In Java, classes have an implicit static member "class". 1844 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1845 FormatTok->is(tok::kw_class)) { 1846 nextToken(); 1847 } 1848 if (Style.isJavaScript() && FormatTok && 1849 FormatTok->Tok.getIdentifierInfo()) { 1850 // JavaScript only has pseudo keywords, all keywords are allowed to 1851 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1852 nextToken(); 1853 } 1854 break; 1855 case tok::semi: 1856 nextToken(); 1857 addUnwrappedLine(); 1858 return; 1859 case tok::r_brace: 1860 addUnwrappedLine(); 1861 return; 1862 case tok::l_paren: { 1863 parseParens(); 1864 // Break the unwrapped line if a K&R C function definition has a parameter 1865 // declaration. 1866 if (OpeningBrace || !IsCpp || !Previous || eof()) 1867 break; 1868 if (isC78ParameterDecl(FormatTok, 1869 Tokens->peekNextToken(/*SkipComment=*/true), 1870 Previous)) { 1871 addUnwrappedLine(); 1872 return; 1873 } 1874 break; 1875 } 1876 case tok::kw_operator: 1877 nextToken(); 1878 if (FormatTok->isBinaryOperator()) 1879 nextToken(); 1880 break; 1881 case tok::caret: 1882 nextToken(); 1883 // Block return type. 1884 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) { 1885 nextToken(); 1886 // Return types: pointers are ok too. 1887 while (FormatTok->is(tok::star)) 1888 nextToken(); 1889 } 1890 // Block argument list. 1891 if (FormatTok->is(tok::l_paren)) 1892 parseParens(); 1893 // Block body. 1894 if (FormatTok->is(tok::l_brace)) 1895 parseChildBlock(); 1896 break; 1897 case tok::l_brace: 1898 if (InRequiresExpression) 1899 FormatTok->setFinalizedType(TT_BracedListLBrace); 1900 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1901 IsDecltypeAutoFunction = Line->SeenDecltypeAuto; 1902 // A block outside of parentheses must be the last part of a 1903 // structural element. 1904 // FIXME: Figure out cases where this is not true, and add projections 1905 // for them (the one we know is missing are lambdas). 1906 if (Style.Language == FormatStyle::LK_Java && 1907 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1908 // If necessary, we could set the type to something different than 1909 // TT_FunctionLBrace. 1910 if (Style.BraceWrapping.AfterControlStatement == 1911 FormatStyle::BWACS_Always) { 1912 addUnwrappedLine(); 1913 } 1914 } else if (Style.BraceWrapping.AfterFunction) { 1915 addUnwrappedLine(); 1916 } 1917 if (!Previous || Previous->isNot(TT_TypeDeclarationParen)) 1918 FormatTok->setFinalizedType(TT_FunctionLBrace); 1919 parseBlock(); 1920 IsDecltypeAutoFunction = false; 1921 addUnwrappedLine(); 1922 return; 1923 } 1924 // Otherwise this was a braced init list, and the structural 1925 // element continues. 1926 break; 1927 case tok::kw_try: 1928 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1929 // field/method declaration. 1930 nextToken(); 1931 break; 1932 } 1933 // We arrive here when parsing function-try blocks. 1934 if (Style.BraceWrapping.AfterFunction) 1935 addUnwrappedLine(); 1936 parseTryCatch(); 1937 return; 1938 case tok::identifier: { 1939 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1940 Line->MustBeDeclaration) { 1941 addUnwrappedLine(); 1942 parseCSharpGenericTypeConstraint(); 1943 break; 1944 } 1945 if (FormatTok->is(TT_MacroBlockEnd)) { 1946 addUnwrappedLine(); 1947 return; 1948 } 1949 1950 // Function declarations (as opposed to function expressions) are parsed 1951 // on their own unwrapped line by continuing this loop. Function 1952 // expressions (functions that are not on their own line) must not create 1953 // a new unwrapped line, so they are special cased below. 1954 size_t TokenCount = Line->Tokens.size(); 1955 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1956 (TokenCount > 1 || 1957 (TokenCount == 1 && 1958 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) { 1959 tryToParseJSFunction(); 1960 break; 1961 } 1962 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1963 FormatTok->is(Keywords.kw_interface)) { 1964 if (Style.isJavaScript()) { 1965 // In JavaScript/TypeScript, "interface" can be used as a standalone 1966 // identifier, e.g. in `var interface = 1;`. If "interface" is 1967 // followed by another identifier, it is very like to be an actual 1968 // interface declaration. 1969 unsigned StoredPosition = Tokens->getPosition(); 1970 FormatToken *Next = Tokens->getNextToken(); 1971 FormatTok = Tokens->setPosition(StoredPosition); 1972 if (!mustBeJSIdent(Keywords, Next)) { 1973 nextToken(); 1974 break; 1975 } 1976 } 1977 parseRecord(); 1978 addUnwrappedLine(); 1979 return; 1980 } 1981 1982 if (Style.isVerilog()) { 1983 if (FormatTok->is(Keywords.kw_table)) { 1984 parseVerilogTable(); 1985 return; 1986 } 1987 if (Keywords.isVerilogBegin(*FormatTok) || 1988 Keywords.isVerilogHierarchy(*FormatTok)) { 1989 parseBlock(); 1990 addUnwrappedLine(); 1991 return; 1992 } 1993 } 1994 1995 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) { 1996 if (parseStructLike()) 1997 return; 1998 break; 1999 } 2000 2001 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 2002 parseStatementMacro(); 2003 return; 2004 } 2005 2006 // See if the following token should start a new unwrapped line. 2007 StringRef Text = FormatTok->TokenText; 2008 2009 FormatToken *PreviousToken = FormatTok; 2010 nextToken(); 2011 2012 // JS doesn't have macros, and within classes colons indicate fields, not 2013 // labels. 2014 if (Style.isJavaScript()) 2015 break; 2016 2017 auto OneTokenSoFar = [&]() { 2018 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 2019 while (I != E && I->Tok->is(tok::comment)) 2020 ++I; 2021 if (Style.isVerilog()) 2022 while (I != E && I->Tok->is(tok::hash)) 2023 ++I; 2024 return I != E && (++I == E); 2025 }; 2026 if (OneTokenSoFar()) { 2027 // Recognize function-like macro usages without trailing semicolon as 2028 // well as free-standing macros like Q_OBJECT. 2029 bool FunctionLike = FormatTok->is(tok::l_paren); 2030 if (FunctionLike) 2031 parseParens(); 2032 2033 bool FollowedByNewline = 2034 CommentsBeforeNextToken.empty() 2035 ? FormatTok->NewlinesBefore > 0 2036 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 2037 2038 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 2039 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 2040 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 2041 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 2042 addUnwrappedLine(); 2043 return; 2044 } 2045 } 2046 break; 2047 } 2048 case tok::equal: 2049 if ((Style.isJavaScript() || Style.isCSharp()) && 2050 FormatTok->is(TT_FatArrow)) { 2051 tryToParseChildBlock(); 2052 break; 2053 } 2054 2055 nextToken(); 2056 if (FormatTok->is(tok::l_brace)) { 2057 // Block kind should probably be set to BK_BracedInit for any language. 2058 // C# needs this change to ensure that array initialisers and object 2059 // initialisers are indented the same way. 2060 if (Style.isCSharp()) 2061 FormatTok->setBlockKind(BK_BracedInit); 2062 // TableGen's defset statement has syntax of the form, 2063 // `defset <type> <name> = { <statement>... }` 2064 if (Style.isTableGen() && 2065 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) { 2066 FormatTok->setFinalizedType(TT_FunctionLBrace); 2067 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2068 /*MunchSemi=*/false); 2069 addUnwrappedLine(); 2070 break; 2071 } 2072 nextToken(); 2073 parseBracedList(); 2074 } else if (Style.Language == FormatStyle::LK_Proto && 2075 FormatTok->is(tok::less)) { 2076 nextToken(); 2077 parseBracedList(/*IsAngleBracket=*/true); 2078 } 2079 break; 2080 case tok::l_square: 2081 parseSquare(); 2082 break; 2083 case tok::kw_new: 2084 parseNew(); 2085 break; 2086 case tok::kw_switch: 2087 if (Style.Language == FormatStyle::LK_Java) 2088 parseSwitch(/*IsExpr=*/true); 2089 nextToken(); 2090 break; 2091 case tok::kw_case: 2092 // Proto: there are no switch/case statements. 2093 if (Style.Language == FormatStyle::LK_Proto) { 2094 nextToken(); 2095 return; 2096 } 2097 // In Verilog switch is called case. 2098 if (Style.isVerilog()) { 2099 parseBlock(); 2100 addUnwrappedLine(); 2101 return; 2102 } 2103 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2104 // 'case: string' field declaration. 2105 nextToken(); 2106 break; 2107 } 2108 parseCaseLabel(); 2109 break; 2110 case tok::kw_default: 2111 nextToken(); 2112 if (Style.isVerilog()) { 2113 if (FormatTok->is(tok::colon)) { 2114 // The label will be handled in the next iteration. 2115 break; 2116 } 2117 if (FormatTok->is(Keywords.kw_clocking)) { 2118 // A default clocking block. 2119 parseBlock(); 2120 addUnwrappedLine(); 2121 return; 2122 } 2123 parseVerilogCaseLabel(); 2124 return; 2125 } 2126 break; 2127 case tok::colon: 2128 nextToken(); 2129 if (Style.isVerilog()) { 2130 parseVerilogCaseLabel(); 2131 return; 2132 } 2133 break; 2134 default: 2135 nextToken(); 2136 break; 2137 } 2138 } 2139 } 2140 2141 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2142 assert(FormatTok->is(tok::l_brace)); 2143 if (!Style.isCSharp()) 2144 return false; 2145 // See if it's a property accessor. 2146 if (FormatTok->Previous->isNot(tok::identifier)) 2147 return false; 2148 2149 // See if we are inside a property accessor. 2150 // 2151 // Record the current tokenPosition so that we can advance and 2152 // reset the current token. `Next` is not set yet so we need 2153 // another way to advance along the token stream. 2154 unsigned int StoredPosition = Tokens->getPosition(); 2155 FormatToken *Tok = Tokens->getNextToken(); 2156 2157 // A trivial property accessor is of the form: 2158 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2159 // Track these as they do not require line breaks to be introduced. 2160 bool HasSpecialAccessor = false; 2161 bool IsTrivialPropertyAccessor = true; 2162 while (!eof()) { 2163 if (Tok->isAccessSpecifierKeyword() || 2164 Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get, 2165 Keywords.kw_init, Keywords.kw_set)) { 2166 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2167 HasSpecialAccessor = true; 2168 Tok = Tokens->getNextToken(); 2169 continue; 2170 } 2171 if (Tok->isNot(tok::r_brace)) 2172 IsTrivialPropertyAccessor = false; 2173 break; 2174 } 2175 2176 if (!HasSpecialAccessor) { 2177 Tokens->setPosition(StoredPosition); 2178 return false; 2179 } 2180 2181 // Try to parse the property accessor: 2182 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2183 Tokens->setPosition(StoredPosition); 2184 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2185 addUnwrappedLine(); 2186 nextToken(); 2187 do { 2188 switch (FormatTok->Tok.getKind()) { 2189 case tok::r_brace: 2190 nextToken(); 2191 if (FormatTok->is(tok::equal)) { 2192 while (!eof() && FormatTok->isNot(tok::semi)) 2193 nextToken(); 2194 nextToken(); 2195 } 2196 addUnwrappedLine(); 2197 return true; 2198 case tok::l_brace: 2199 ++Line->Level; 2200 parseBlock(/*MustBeDeclaration=*/true); 2201 addUnwrappedLine(); 2202 --Line->Level; 2203 break; 2204 case tok::equal: 2205 if (FormatTok->is(TT_FatArrow)) { 2206 ++Line->Level; 2207 do { 2208 nextToken(); 2209 } while (!eof() && FormatTok->isNot(tok::semi)); 2210 nextToken(); 2211 addUnwrappedLine(); 2212 --Line->Level; 2213 break; 2214 } 2215 nextToken(); 2216 break; 2217 default: 2218 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2219 Keywords.kw_set) && 2220 !IsTrivialPropertyAccessor) { 2221 // Non-trivial get/set needs to be on its own line. 2222 addUnwrappedLine(); 2223 } 2224 nextToken(); 2225 } 2226 } while (!eof()); 2227 2228 // Unreachable for well-formed code (paired '{' and '}'). 2229 return true; 2230 } 2231 2232 bool UnwrappedLineParser::tryToParseLambda() { 2233 assert(FormatTok->is(tok::l_square)); 2234 if (!IsCpp) { 2235 nextToken(); 2236 return false; 2237 } 2238 FormatToken &LSquare = *FormatTok; 2239 if (!tryToParseLambdaIntroducer()) 2240 return false; 2241 2242 bool SeenArrow = false; 2243 bool InTemplateParameterList = false; 2244 2245 while (FormatTok->isNot(tok::l_brace)) { 2246 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) { 2247 nextToken(); 2248 continue; 2249 } 2250 switch (FormatTok->Tok.getKind()) { 2251 case tok::l_brace: 2252 break; 2253 case tok::l_paren: 2254 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2255 break; 2256 case tok::l_square: 2257 parseSquare(); 2258 break; 2259 case tok::less: 2260 assert(FormatTok->Previous); 2261 if (FormatTok->Previous->is(tok::r_square)) 2262 InTemplateParameterList = true; 2263 nextToken(); 2264 break; 2265 case tok::kw_auto: 2266 case tok::kw_class: 2267 case tok::kw_struct: 2268 case tok::kw_union: 2269 case tok::kw_template: 2270 case tok::kw_typename: 2271 case tok::amp: 2272 case tok::star: 2273 case tok::kw_const: 2274 case tok::kw_constexpr: 2275 case tok::kw_consteval: 2276 case tok::comma: 2277 case tok::greater: 2278 case tok::identifier: 2279 case tok::numeric_constant: 2280 case tok::coloncolon: 2281 case tok::kw_mutable: 2282 case tok::kw_noexcept: 2283 case tok::kw_static: 2284 nextToken(); 2285 break; 2286 // Specialization of a template with an integer parameter can contain 2287 // arithmetic, logical, comparison and ternary operators. 2288 // 2289 // FIXME: This also accepts sequences of operators that are not in the scope 2290 // of a template argument list. 2291 // 2292 // In a C++ lambda a template type can only occur after an arrow. We use 2293 // this as an heuristic to distinguish between Objective-C expressions 2294 // followed by an `a->b` expression, such as: 2295 // ([obj func:arg] + a->b) 2296 // Otherwise the code below would parse as a lambda. 2297 case tok::plus: 2298 case tok::minus: 2299 case tok::exclaim: 2300 case tok::tilde: 2301 case tok::slash: 2302 case tok::percent: 2303 case tok::lessless: 2304 case tok::pipe: 2305 case tok::pipepipe: 2306 case tok::ampamp: 2307 case tok::caret: 2308 case tok::equalequal: 2309 case tok::exclaimequal: 2310 case tok::greaterequal: 2311 case tok::lessequal: 2312 case tok::question: 2313 case tok::colon: 2314 case tok::ellipsis: 2315 case tok::kw_true: 2316 case tok::kw_false: 2317 if (SeenArrow || InTemplateParameterList) { 2318 nextToken(); 2319 break; 2320 } 2321 return true; 2322 case tok::arrow: 2323 // This might or might not actually be a lambda arrow (this could be an 2324 // ObjC method invocation followed by a dereferencing arrow). We might 2325 // reset this back to TT_Unknown in TokenAnnotator. 2326 FormatTok->setFinalizedType(TT_LambdaArrow); 2327 SeenArrow = true; 2328 nextToken(); 2329 break; 2330 case tok::kw_requires: { 2331 auto *RequiresToken = FormatTok; 2332 nextToken(); 2333 parseRequiresClause(RequiresToken); 2334 break; 2335 } 2336 case tok::equal: 2337 if (!InTemplateParameterList) 2338 return true; 2339 nextToken(); 2340 break; 2341 default: 2342 return true; 2343 } 2344 } 2345 2346 FormatTok->setFinalizedType(TT_LambdaLBrace); 2347 LSquare.setFinalizedType(TT_LambdaLSquare); 2348 2349 NestedLambdas.push_back(Line->SeenDecltypeAuto); 2350 parseChildBlock(); 2351 assert(!NestedLambdas.empty()); 2352 NestedLambdas.pop_back(); 2353 2354 return true; 2355 } 2356 2357 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2358 const FormatToken *Previous = FormatTok->Previous; 2359 const FormatToken *LeftSquare = FormatTok; 2360 nextToken(); 2361 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2362 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2363 tok::kw_co_yield, tok::kw_co_return)) || 2364 Previous->closesScope())) || 2365 LeftSquare->isCppStructuredBinding(IsCpp)) { 2366 return false; 2367 } 2368 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) 2369 return false; 2370 if (FormatTok->is(tok::r_square)) { 2371 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2372 if (Next->is(tok::greater)) 2373 return false; 2374 } 2375 parseSquare(/*LambdaIntroducer=*/true); 2376 return true; 2377 } 2378 2379 void UnwrappedLineParser::tryToParseJSFunction() { 2380 assert(FormatTok->is(Keywords.kw_function)); 2381 if (FormatTok->is(Keywords.kw_async)) 2382 nextToken(); 2383 // Consume "function". 2384 nextToken(); 2385 2386 // Consume * (generator function). Treat it like C++'s overloaded operators. 2387 if (FormatTok->is(tok::star)) { 2388 FormatTok->setFinalizedType(TT_OverloadedOperator); 2389 nextToken(); 2390 } 2391 2392 // Consume function name. 2393 if (FormatTok->is(tok::identifier)) 2394 nextToken(); 2395 2396 if (FormatTok->isNot(tok::l_paren)) 2397 return; 2398 2399 // Parse formal parameter list. 2400 parseParens(); 2401 2402 if (FormatTok->is(tok::colon)) { 2403 // Parse a type definition. 2404 nextToken(); 2405 2406 // Eat the type declaration. For braced inline object types, balance braces, 2407 // otherwise just parse until finding an l_brace for the function body. 2408 if (FormatTok->is(tok::l_brace)) 2409 tryToParseBracedList(); 2410 else 2411 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2412 nextToken(); 2413 } 2414 2415 if (FormatTok->is(tok::semi)) 2416 return; 2417 2418 parseChildBlock(); 2419 } 2420 2421 bool UnwrappedLineParser::tryToParseBracedList() { 2422 if (FormatTok->is(BK_Unknown)) 2423 calculateBraceTypes(); 2424 assert(FormatTok->isNot(BK_Unknown)); 2425 if (FormatTok->is(BK_Block)) 2426 return false; 2427 nextToken(); 2428 parseBracedList(); 2429 return true; 2430 } 2431 2432 bool UnwrappedLineParser::tryToParseChildBlock() { 2433 assert(Style.isJavaScript() || Style.isCSharp()); 2434 assert(FormatTok->is(TT_FatArrow)); 2435 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2436 // They always start an expression or a child block if followed by a curly 2437 // brace. 2438 nextToken(); 2439 if (FormatTok->isNot(tok::l_brace)) 2440 return false; 2441 parseChildBlock(); 2442 return true; 2443 } 2444 2445 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { 2446 assert(!IsAngleBracket || !IsEnum); 2447 bool HasError = false; 2448 2449 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2450 // replace this by using parseAssignmentExpression() inside. 2451 do { 2452 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2453 tryToParseChildBlock()) { 2454 continue; 2455 } 2456 if (Style.isJavaScript()) { 2457 if (FormatTok->is(Keywords.kw_function)) { 2458 tryToParseJSFunction(); 2459 continue; 2460 } 2461 if (FormatTok->is(tok::l_brace)) { 2462 // Could be a method inside of a braced list `{a() { return 1; }}`. 2463 if (tryToParseBracedList()) 2464 continue; 2465 parseChildBlock(); 2466 } 2467 } 2468 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) { 2469 if (IsEnum) { 2470 FormatTok->setBlockKind(BK_Block); 2471 if (!Style.AllowShortEnumsOnASingleLine) 2472 addUnwrappedLine(); 2473 } 2474 nextToken(); 2475 return !HasError; 2476 } 2477 switch (FormatTok->Tok.getKind()) { 2478 case tok::l_square: 2479 if (Style.isCSharp()) 2480 parseSquare(); 2481 else 2482 tryToParseLambda(); 2483 break; 2484 case tok::l_paren: 2485 parseParens(); 2486 // JavaScript can just have free standing methods and getters/setters in 2487 // object literals. Detect them by a "{" following ")". 2488 if (Style.isJavaScript()) { 2489 if (FormatTok->is(tok::l_brace)) 2490 parseChildBlock(); 2491 break; 2492 } 2493 break; 2494 case tok::l_brace: 2495 // Assume there are no blocks inside a braced init list apart 2496 // from the ones we explicitly parse out (like lambdas). 2497 FormatTok->setBlockKind(BK_BracedInit); 2498 nextToken(); 2499 parseBracedList(); 2500 break; 2501 case tok::less: 2502 nextToken(); 2503 if (IsAngleBracket) 2504 parseBracedList(/*IsAngleBracket=*/true); 2505 break; 2506 case tok::semi: 2507 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2508 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2509 // used for error recovery if we have otherwise determined that this is 2510 // a braced list. 2511 if (Style.isJavaScript()) { 2512 nextToken(); 2513 break; 2514 } 2515 HasError = true; 2516 if (!IsEnum) 2517 return false; 2518 nextToken(); 2519 break; 2520 case tok::comma: 2521 nextToken(); 2522 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2523 addUnwrappedLine(); 2524 break; 2525 default: 2526 nextToken(); 2527 break; 2528 } 2529 } while (!eof()); 2530 return false; 2531 } 2532 2533 /// \brief Parses a pair of parentheses (and everything between them). 2534 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2535 /// double ampersands. This applies for all nested scopes as well. 2536 /// 2537 /// Returns whether there is a `=` token between the parentheses. 2538 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2539 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2540 auto *LeftParen = FormatTok; 2541 bool SeenEqual = false; 2542 bool MightBeFoldExpr = false; 2543 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2544 nextToken(); 2545 do { 2546 switch (FormatTok->Tok.getKind()) { 2547 case tok::l_paren: 2548 if (parseParens(AmpAmpTokenType)) 2549 SeenEqual = true; 2550 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2551 parseChildBlock(); 2552 break; 2553 case tok::r_paren: { 2554 const auto *Prev = LeftParen->Previous; 2555 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && 2556 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2557 const auto *Next = Tokens->peekNextToken(); 2558 const bool DoubleParens = 2559 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2560 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2561 const bool Blacklisted = 2562 PrevPrev && 2563 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || 2564 (SeenEqual && 2565 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2566 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2567 const bool ReturnParens = 2568 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2569 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || 2570 (!NestedLambdas.empty() && !NestedLambdas.back())) && 2571 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2572 Next->is(tok::semi); 2573 if ((DoubleParens && !Blacklisted) || ReturnParens) { 2574 LeftParen->Optional = true; 2575 FormatTok->Optional = true; 2576 } 2577 } 2578 if (Prev && Prev->is(TT_TypenameMacro)) { 2579 LeftParen->setFinalizedType(TT_TypeDeclarationParen); 2580 FormatTok->setFinalizedType(TT_TypeDeclarationParen); 2581 } 2582 nextToken(); 2583 return SeenEqual; 2584 } 2585 case tok::r_brace: 2586 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2587 return SeenEqual; 2588 case tok::l_square: 2589 tryToParseLambda(); 2590 break; 2591 case tok::l_brace: 2592 if (!tryToParseBracedList()) 2593 parseChildBlock(); 2594 break; 2595 case tok::at: 2596 nextToken(); 2597 if (FormatTok->is(tok::l_brace)) { 2598 nextToken(); 2599 parseBracedList(); 2600 } 2601 break; 2602 case tok::ellipsis: 2603 MightBeFoldExpr = true; 2604 nextToken(); 2605 break; 2606 case tok::equal: 2607 SeenEqual = true; 2608 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2609 tryToParseChildBlock(); 2610 else 2611 nextToken(); 2612 break; 2613 case tok::kw_class: 2614 if (Style.isJavaScript()) 2615 parseRecord(/*ParseAsExpr=*/true); 2616 else 2617 nextToken(); 2618 break; 2619 case tok::identifier: 2620 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function))) 2621 tryToParseJSFunction(); 2622 else 2623 nextToken(); 2624 break; 2625 case tok::kw_switch: 2626 parseSwitch(/*IsExpr=*/true); 2627 break; 2628 case tok::kw_requires: { 2629 auto RequiresToken = FormatTok; 2630 nextToken(); 2631 parseRequiresExpression(RequiresToken); 2632 break; 2633 } 2634 case tok::ampamp: 2635 if (AmpAmpTokenType != TT_Unknown) 2636 FormatTok->setFinalizedType(AmpAmpTokenType); 2637 [[fallthrough]]; 2638 default: 2639 nextToken(); 2640 break; 2641 } 2642 } while (!eof()); 2643 return SeenEqual; 2644 } 2645 2646 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2647 if (!LambdaIntroducer) { 2648 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2649 if (tryToParseLambda()) 2650 return; 2651 } 2652 do { 2653 switch (FormatTok->Tok.getKind()) { 2654 case tok::l_paren: 2655 parseParens(); 2656 break; 2657 case tok::r_square: 2658 nextToken(); 2659 return; 2660 case tok::r_brace: 2661 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2662 return; 2663 case tok::l_square: 2664 parseSquare(); 2665 break; 2666 case tok::l_brace: { 2667 if (!tryToParseBracedList()) 2668 parseChildBlock(); 2669 break; 2670 } 2671 case tok::at: 2672 case tok::colon: 2673 nextToken(); 2674 if (FormatTok->is(tok::l_brace)) { 2675 nextToken(); 2676 parseBracedList(); 2677 } 2678 break; 2679 default: 2680 nextToken(); 2681 break; 2682 } 2683 } while (!eof()); 2684 } 2685 2686 void UnwrappedLineParser::keepAncestorBraces() { 2687 if (!Style.RemoveBracesLLVM) 2688 return; 2689 2690 const int MaxNestingLevels = 2; 2691 const int Size = NestedTooDeep.size(); 2692 if (Size >= MaxNestingLevels) 2693 NestedTooDeep[Size - MaxNestingLevels] = true; 2694 NestedTooDeep.push_back(false); 2695 } 2696 2697 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2698 for (const auto &Token : llvm::reverse(Line.Tokens)) 2699 if (Token.Tok->isNot(tok::comment)) 2700 return Token.Tok; 2701 2702 return nullptr; 2703 } 2704 2705 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2706 FormatToken *Tok = nullptr; 2707 2708 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2709 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2710 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2711 ? getLastNonComment(*Line) 2712 : Line->Tokens.back().Tok; 2713 assert(Tok); 2714 if (Tok->BraceCount < 0) { 2715 assert(Tok->BraceCount == -1); 2716 Tok = nullptr; 2717 } else { 2718 Tok->BraceCount = -1; 2719 } 2720 } 2721 2722 addUnwrappedLine(); 2723 ++Line->Level; 2724 ++Line->UnbracedBodyLevel; 2725 parseStructuralElement(); 2726 --Line->UnbracedBodyLevel; 2727 2728 if (Tok) { 2729 assert(!Line->InPPDirective); 2730 Tok = nullptr; 2731 for (const auto &L : llvm::reverse(*CurrentLines)) { 2732 if (!L.InPPDirective && getLastNonComment(L)) { 2733 Tok = L.Tokens.back().Tok; 2734 break; 2735 } 2736 } 2737 assert(Tok); 2738 ++Tok->BraceCount; 2739 } 2740 2741 if (CheckEOF && eof()) 2742 addUnwrappedLine(); 2743 2744 --Line->Level; 2745 } 2746 2747 static void markOptionalBraces(FormatToken *LeftBrace) { 2748 if (!LeftBrace) 2749 return; 2750 2751 assert(LeftBrace->is(tok::l_brace)); 2752 2753 FormatToken *RightBrace = LeftBrace->MatchingParen; 2754 if (!RightBrace) { 2755 assert(!LeftBrace->Optional); 2756 return; 2757 } 2758 2759 assert(RightBrace->is(tok::r_brace)); 2760 assert(RightBrace->MatchingParen == LeftBrace); 2761 assert(LeftBrace->Optional == RightBrace->Optional); 2762 2763 LeftBrace->Optional = true; 2764 RightBrace->Optional = true; 2765 } 2766 2767 void UnwrappedLineParser::handleAttributes() { 2768 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2769 if (FormatTok->isAttribute()) 2770 nextToken(); 2771 else if (FormatTok->is(tok::l_square)) 2772 handleCppAttributes(); 2773 } 2774 2775 bool UnwrappedLineParser::handleCppAttributes() { 2776 // Handle [[likely]] / [[unlikely]] attributes. 2777 assert(FormatTok->is(tok::l_square)); 2778 if (!tryToParseSimpleAttribute()) 2779 return false; 2780 parseSquare(); 2781 return true; 2782 } 2783 2784 /// Returns whether \c Tok begins a block. 2785 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2786 // FIXME: rename the function or make 2787 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2788 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2789 : Tok.is(tok::l_brace); 2790 } 2791 2792 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2793 bool KeepBraces, 2794 bool IsVerilogAssert) { 2795 assert((FormatTok->is(tok::kw_if) || 2796 (Style.isVerilog() && 2797 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2798 Keywords.kw_assume, Keywords.kw_cover))) && 2799 "'if' expected"); 2800 nextToken(); 2801 2802 if (IsVerilogAssert) { 2803 // Handle `assert #0` and `assert final`. 2804 if (FormatTok->is(Keywords.kw_verilogHash)) { 2805 nextToken(); 2806 if (FormatTok->is(tok::numeric_constant)) 2807 nextToken(); 2808 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2809 Keywords.kw_sequence)) { 2810 nextToken(); 2811 } 2812 } 2813 2814 // TableGen's if statement has the form of `if <cond> then { ... }`. 2815 if (Style.isTableGen()) { 2816 while (!eof() && FormatTok->isNot(Keywords.kw_then)) { 2817 // Simply skip until then. This range only contains a value. 2818 nextToken(); 2819 } 2820 } 2821 2822 // Handle `if !consteval`. 2823 if (FormatTok->is(tok::exclaim)) 2824 nextToken(); 2825 2826 bool KeepIfBraces = true; 2827 if (FormatTok->is(tok::kw_consteval)) { 2828 nextToken(); 2829 } else { 2830 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2831 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2832 nextToken(); 2833 if (FormatTok->is(tok::l_paren)) { 2834 FormatTok->setFinalizedType(TT_ConditionLParen); 2835 parseParens(); 2836 } 2837 } 2838 handleAttributes(); 2839 // The then action is optional in Verilog assert statements. 2840 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2841 nextToken(); 2842 addUnwrappedLine(); 2843 return nullptr; 2844 } 2845 2846 bool NeedsUnwrappedLine = false; 2847 keepAncestorBraces(); 2848 2849 FormatToken *IfLeftBrace = nullptr; 2850 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2851 2852 if (isBlockBegin(*FormatTok)) { 2853 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2854 IfLeftBrace = FormatTok; 2855 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2856 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2857 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2858 setPreviousRBraceType(TT_ControlStatementRBrace); 2859 if (Style.BraceWrapping.BeforeElse) 2860 addUnwrappedLine(); 2861 else 2862 NeedsUnwrappedLine = true; 2863 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2864 addUnwrappedLine(); 2865 } else { 2866 parseUnbracedBody(); 2867 } 2868 2869 if (Style.RemoveBracesLLVM) { 2870 assert(!NestedTooDeep.empty()); 2871 KeepIfBraces = KeepIfBraces || 2872 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2873 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2874 IfBlockKind == IfStmtKind::IfElseIf; 2875 } 2876 2877 bool KeepElseBraces = KeepIfBraces; 2878 FormatToken *ElseLeftBrace = nullptr; 2879 IfStmtKind Kind = IfStmtKind::IfOnly; 2880 2881 if (FormatTok->is(tok::kw_else)) { 2882 if (Style.RemoveBracesLLVM) { 2883 NestedTooDeep.back() = false; 2884 Kind = IfStmtKind::IfElse; 2885 } 2886 nextToken(); 2887 handleAttributes(); 2888 if (isBlockBegin(*FormatTok)) { 2889 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2890 FormatTok->setFinalizedType(TT_ElseLBrace); 2891 ElseLeftBrace = FormatTok; 2892 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2893 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2894 FormatToken *IfLBrace = 2895 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2896 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2897 setPreviousRBraceType(TT_ElseRBrace); 2898 if (FormatTok->is(tok::kw_else)) { 2899 KeepElseBraces = KeepElseBraces || 2900 ElseBlockKind == IfStmtKind::IfOnly || 2901 ElseBlockKind == IfStmtKind::IfElseIf; 2902 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2903 KeepElseBraces = true; 2904 assert(ElseLeftBrace->MatchingParen); 2905 markOptionalBraces(ElseLeftBrace); 2906 } 2907 addUnwrappedLine(); 2908 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2909 const FormatToken *Previous = Tokens->getPreviousToken(); 2910 assert(Previous); 2911 const bool IsPrecededByComment = Previous->is(tok::comment); 2912 if (IsPrecededByComment) { 2913 addUnwrappedLine(); 2914 ++Line->Level; 2915 } 2916 bool TooDeep = true; 2917 if (Style.RemoveBracesLLVM) { 2918 Kind = IfStmtKind::IfElseIf; 2919 TooDeep = NestedTooDeep.pop_back_val(); 2920 } 2921 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2922 if (Style.RemoveBracesLLVM) 2923 NestedTooDeep.push_back(TooDeep); 2924 if (IsPrecededByComment) 2925 --Line->Level; 2926 } else { 2927 parseUnbracedBody(/*CheckEOF=*/true); 2928 } 2929 } else { 2930 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2931 if (NeedsUnwrappedLine) 2932 addUnwrappedLine(); 2933 } 2934 2935 if (!Style.RemoveBracesLLVM) 2936 return nullptr; 2937 2938 assert(!NestedTooDeep.empty()); 2939 KeepElseBraces = KeepElseBraces || 2940 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2941 NestedTooDeep.back(); 2942 2943 NestedTooDeep.pop_back(); 2944 2945 if (!KeepIfBraces && !KeepElseBraces) { 2946 markOptionalBraces(IfLeftBrace); 2947 markOptionalBraces(ElseLeftBrace); 2948 } else if (IfLeftBrace) { 2949 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2950 if (IfRightBrace) { 2951 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2952 assert(!IfLeftBrace->Optional); 2953 assert(!IfRightBrace->Optional); 2954 IfLeftBrace->MatchingParen = nullptr; 2955 IfRightBrace->MatchingParen = nullptr; 2956 } 2957 } 2958 2959 if (IfKind) 2960 *IfKind = Kind; 2961 2962 return IfLeftBrace; 2963 } 2964 2965 void UnwrappedLineParser::parseTryCatch() { 2966 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2967 nextToken(); 2968 bool NeedsUnwrappedLine = false; 2969 bool HasCtorInitializer = false; 2970 if (FormatTok->is(tok::colon)) { 2971 auto *Colon = FormatTok; 2972 // We are in a function try block, what comes is an initializer list. 2973 nextToken(); 2974 if (FormatTok->is(tok::identifier)) { 2975 HasCtorInitializer = true; 2976 Colon->setFinalizedType(TT_CtorInitializerColon); 2977 } 2978 2979 // In case identifiers were removed by clang-tidy, what might follow is 2980 // multiple commas in sequence - before the first identifier. 2981 while (FormatTok->is(tok::comma)) 2982 nextToken(); 2983 2984 while (FormatTok->is(tok::identifier)) { 2985 nextToken(); 2986 if (FormatTok->is(tok::l_paren)) { 2987 parseParens(); 2988 } else if (FormatTok->is(tok::l_brace)) { 2989 nextToken(); 2990 parseBracedList(); 2991 } 2992 2993 // In case identifiers were removed by clang-tidy, what might follow is 2994 // multiple commas in sequence - after the first identifier. 2995 while (FormatTok->is(tok::comma)) 2996 nextToken(); 2997 } 2998 } 2999 // Parse try with resource. 3000 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 3001 parseParens(); 3002 3003 keepAncestorBraces(); 3004 3005 if (FormatTok->is(tok::l_brace)) { 3006 if (HasCtorInitializer) 3007 FormatTok->setFinalizedType(TT_FunctionLBrace); 3008 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3009 parseBlock(); 3010 if (Style.BraceWrapping.BeforeCatch) 3011 addUnwrappedLine(); 3012 else 3013 NeedsUnwrappedLine = true; 3014 } else if (FormatTok->isNot(tok::kw_catch)) { 3015 // The C++ standard requires a compound-statement after a try. 3016 // If there's none, we try to assume there's a structuralElement 3017 // and try to continue. 3018 addUnwrappedLine(); 3019 ++Line->Level; 3020 parseStructuralElement(); 3021 --Line->Level; 3022 } 3023 while (true) { 3024 if (FormatTok->is(tok::at)) 3025 nextToken(); 3026 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 3027 tok::kw___finally) || 3028 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3029 FormatTok->is(Keywords.kw_finally)) || 3030 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 3031 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 3032 break; 3033 } 3034 nextToken(); 3035 while (FormatTok->isNot(tok::l_brace)) { 3036 if (FormatTok->is(tok::l_paren)) { 3037 parseParens(); 3038 continue; 3039 } 3040 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 3041 if (Style.RemoveBracesLLVM) 3042 NestedTooDeep.pop_back(); 3043 return; 3044 } 3045 nextToken(); 3046 } 3047 NeedsUnwrappedLine = false; 3048 Line->MustBeDeclaration = false; 3049 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3050 parseBlock(); 3051 if (Style.BraceWrapping.BeforeCatch) 3052 addUnwrappedLine(); 3053 else 3054 NeedsUnwrappedLine = true; 3055 } 3056 3057 if (Style.RemoveBracesLLVM) 3058 NestedTooDeep.pop_back(); 3059 3060 if (NeedsUnwrappedLine) 3061 addUnwrappedLine(); 3062 } 3063 3064 void UnwrappedLineParser::parseNamespace() { 3065 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 3066 "'namespace' expected"); 3067 3068 const FormatToken &InitialToken = *FormatTok; 3069 nextToken(); 3070 if (InitialToken.is(TT_NamespaceMacro)) { 3071 parseParens(); 3072 } else { 3073 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 3074 tok::l_square, tok::period, tok::l_paren) || 3075 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 3076 if (FormatTok->is(tok::l_square)) 3077 parseSquare(); 3078 else if (FormatTok->is(tok::l_paren)) 3079 parseParens(); 3080 else 3081 nextToken(); 3082 } 3083 } 3084 if (FormatTok->is(tok::l_brace)) { 3085 FormatTok->setFinalizedType(TT_NamespaceLBrace); 3086 3087 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3088 addUnwrappedLine(); 3089 3090 unsigned AddLevels = 3091 Style.NamespaceIndentation == FormatStyle::NI_All || 3092 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 3093 DeclarationScopeStack.size() > 1) 3094 ? 1u 3095 : 0u; 3096 bool ManageWhitesmithsBraces = 3097 AddLevels == 0u && 3098 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3099 3100 // If we're in Whitesmiths mode, indent the brace if we're not indenting 3101 // the whole block. 3102 if (ManageWhitesmithsBraces) 3103 ++Line->Level; 3104 3105 // Munch the semicolon after a namespace. This is more common than one would 3106 // think. Putting the semicolon into its own line is very ugly. 3107 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 3108 /*KeepBraces=*/true, /*IfKind=*/nullptr, 3109 ManageWhitesmithsBraces); 3110 3111 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 3112 3113 if (ManageWhitesmithsBraces) 3114 --Line->Level; 3115 } 3116 // FIXME: Add error handling. 3117 } 3118 3119 void UnwrappedLineParser::parseNew() { 3120 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 3121 nextToken(); 3122 3123 if (Style.isCSharp()) { 3124 do { 3125 // Handle constructor invocation, e.g. `new(field: value)`. 3126 if (FormatTok->is(tok::l_paren)) 3127 parseParens(); 3128 3129 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3130 if (FormatTok->is(tok::l_brace)) 3131 parseBracedList(); 3132 3133 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3134 return; 3135 3136 nextToken(); 3137 } while (!eof()); 3138 } 3139 3140 if (Style.Language != FormatStyle::LK_Java) 3141 return; 3142 3143 // In Java, we can parse everything up to the parens, which aren't optional. 3144 do { 3145 // There should not be a ;, { or } before the new's open paren. 3146 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3147 return; 3148 3149 // Consume the parens. 3150 if (FormatTok->is(tok::l_paren)) { 3151 parseParens(); 3152 3153 // If there is a class body of an anonymous class, consume that as child. 3154 if (FormatTok->is(tok::l_brace)) 3155 parseChildBlock(); 3156 return; 3157 } 3158 nextToken(); 3159 } while (!eof()); 3160 } 3161 3162 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3163 keepAncestorBraces(); 3164 3165 if (isBlockBegin(*FormatTok)) { 3166 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3167 FormatToken *LeftBrace = FormatTok; 3168 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3169 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3170 /*MunchSemi=*/true, KeepBraces); 3171 setPreviousRBraceType(TT_ControlStatementRBrace); 3172 if (!KeepBraces) { 3173 assert(!NestedTooDeep.empty()); 3174 if (!NestedTooDeep.back()) 3175 markOptionalBraces(LeftBrace); 3176 } 3177 if (WrapRightBrace) 3178 addUnwrappedLine(); 3179 } else { 3180 parseUnbracedBody(); 3181 } 3182 3183 if (!KeepBraces) 3184 NestedTooDeep.pop_back(); 3185 } 3186 3187 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3188 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3189 (Style.isVerilog() && 3190 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3191 Keywords.kw_always_ff, Keywords.kw_always_latch, 3192 Keywords.kw_final, Keywords.kw_initial, 3193 Keywords.kw_foreach, Keywords.kw_forever, 3194 Keywords.kw_repeat))) && 3195 "'for', 'while' or foreach macro expected"); 3196 const bool KeepBraces = !Style.RemoveBracesLLVM || 3197 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3198 3199 nextToken(); 3200 // JS' for await ( ... 3201 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3202 nextToken(); 3203 if (IsCpp && FormatTok->is(tok::kw_co_await)) 3204 nextToken(); 3205 if (HasParens && FormatTok->is(tok::l_paren)) { 3206 // The type is only set for Verilog basically because we were afraid to 3207 // change the existing behavior for loops. See the discussion on D121756 for 3208 // details. 3209 if (Style.isVerilog()) 3210 FormatTok->setFinalizedType(TT_ConditionLParen); 3211 parseParens(); 3212 } 3213 3214 if (Style.isVerilog()) { 3215 // Event control. 3216 parseVerilogSensitivityList(); 3217 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) && 3218 Tokens->getPreviousToken()->is(tok::r_paren)) { 3219 nextToken(); 3220 addUnwrappedLine(); 3221 return; 3222 } 3223 3224 handleAttributes(); 3225 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3226 } 3227 3228 void UnwrappedLineParser::parseDoWhile() { 3229 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3230 nextToken(); 3231 3232 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3233 3234 // FIXME: Add error handling. 3235 if (FormatTok->isNot(tok::kw_while)) { 3236 addUnwrappedLine(); 3237 return; 3238 } 3239 3240 FormatTok->setFinalizedType(TT_DoWhile); 3241 3242 // If in Whitesmiths mode, the line with the while() needs to be indented 3243 // to the same level as the block. 3244 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3245 ++Line->Level; 3246 3247 nextToken(); 3248 parseStructuralElement(); 3249 } 3250 3251 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3252 nextToken(); 3253 unsigned OldLineLevel = Line->Level; 3254 3255 if (LeftAlignLabel) 3256 Line->Level = 0; 3257 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3258 --Line->Level; 3259 3260 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3261 FormatTok->is(tok::l_brace)) { 3262 3263 CompoundStatementIndenter Indenter(this, Line->Level, 3264 Style.BraceWrapping.AfterCaseLabel, 3265 Style.BraceWrapping.IndentBraces); 3266 parseBlock(); 3267 if (FormatTok->is(tok::kw_break)) { 3268 if (Style.BraceWrapping.AfterControlStatement == 3269 FormatStyle::BWACS_Always) { 3270 addUnwrappedLine(); 3271 if (!Style.IndentCaseBlocks && 3272 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3273 ++Line->Level; 3274 } 3275 } 3276 parseStructuralElement(); 3277 } 3278 addUnwrappedLine(); 3279 } else { 3280 if (FormatTok->is(tok::semi)) 3281 nextToken(); 3282 addUnwrappedLine(); 3283 } 3284 Line->Level = OldLineLevel; 3285 if (FormatTok->isNot(tok::l_brace)) { 3286 parseStructuralElement(); 3287 addUnwrappedLine(); 3288 } 3289 } 3290 3291 void UnwrappedLineParser::parseCaseLabel() { 3292 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3293 auto *Case = FormatTok; 3294 3295 // FIXME: fix handling of complex expressions here. 3296 do { 3297 nextToken(); 3298 if (FormatTok->is(tok::colon)) { 3299 FormatTok->setFinalizedType(TT_CaseLabelColon); 3300 break; 3301 } 3302 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) { 3303 FormatTok->setFinalizedType(TT_CaseLabelArrow); 3304 Case->setFinalizedType(TT_SwitchExpressionLabel); 3305 break; 3306 } 3307 } while (!eof()); 3308 parseLabel(); 3309 } 3310 3311 void UnwrappedLineParser::parseSwitch(bool IsExpr) { 3312 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3313 nextToken(); 3314 if (FormatTok->is(tok::l_paren)) 3315 parseParens(); 3316 3317 keepAncestorBraces(); 3318 3319 if (FormatTok->is(tok::l_brace)) { 3320 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3321 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace 3322 : TT_ControlStatementLBrace); 3323 if (IsExpr) 3324 parseChildBlock(); 3325 else 3326 parseBlock(); 3327 setPreviousRBraceType(TT_ControlStatementRBrace); 3328 if (!IsExpr) 3329 addUnwrappedLine(); 3330 } else { 3331 addUnwrappedLine(); 3332 ++Line->Level; 3333 parseStructuralElement(); 3334 --Line->Level; 3335 } 3336 3337 if (Style.RemoveBracesLLVM) 3338 NestedTooDeep.pop_back(); 3339 } 3340 3341 // Operators that can follow a C variable. 3342 static bool isCOperatorFollowingVar(tok::TokenKind Kind) { 3343 switch (Kind) { 3344 case tok::ampamp: 3345 case tok::ampequal: 3346 case tok::arrow: 3347 case tok::caret: 3348 case tok::caretequal: 3349 case tok::comma: 3350 case tok::ellipsis: 3351 case tok::equal: 3352 case tok::equalequal: 3353 case tok::exclaim: 3354 case tok::exclaimequal: 3355 case tok::greater: 3356 case tok::greaterequal: 3357 case tok::greatergreater: 3358 case tok::greatergreaterequal: 3359 case tok::l_paren: 3360 case tok::l_square: 3361 case tok::less: 3362 case tok::lessequal: 3363 case tok::lessless: 3364 case tok::lesslessequal: 3365 case tok::minus: 3366 case tok::minusequal: 3367 case tok::minusminus: 3368 case tok::percent: 3369 case tok::percentequal: 3370 case tok::period: 3371 case tok::pipe: 3372 case tok::pipeequal: 3373 case tok::pipepipe: 3374 case tok::plus: 3375 case tok::plusequal: 3376 case tok::plusplus: 3377 case tok::question: 3378 case tok::r_brace: 3379 case tok::r_paren: 3380 case tok::r_square: 3381 case tok::semi: 3382 case tok::slash: 3383 case tok::slashequal: 3384 case tok::star: 3385 case tok::starequal: 3386 return true; 3387 default: 3388 return false; 3389 } 3390 } 3391 3392 void UnwrappedLineParser::parseAccessSpecifier() { 3393 FormatToken *AccessSpecifierCandidate = FormatTok; 3394 nextToken(); 3395 // Understand Qt's slots. 3396 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3397 nextToken(); 3398 // Otherwise, we don't know what it is, and we'd better keep the next token. 3399 if (FormatTok->is(tok::colon)) { 3400 nextToken(); 3401 addUnwrappedLine(); 3402 } else if (FormatTok->isNot(tok::coloncolon) && 3403 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3404 // Not a variable name nor namespace name. 3405 addUnwrappedLine(); 3406 } else if (AccessSpecifierCandidate) { 3407 // Consider the access specifier to be a C identifier. 3408 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3409 } 3410 } 3411 3412 /// \brief Parses a requires, decides if it is a clause or an expression. 3413 /// \pre The current token has to be the requires keyword. 3414 /// \returns true if it parsed a clause. 3415 bool UnwrappedLineParser::parseRequires() { 3416 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3417 auto RequiresToken = FormatTok; 3418 3419 // We try to guess if it is a requires clause, or a requires expression. For 3420 // that we first consume the keyword and check the next token. 3421 nextToken(); 3422 3423 switch (FormatTok->Tok.getKind()) { 3424 case tok::l_brace: 3425 // This can only be an expression, never a clause. 3426 parseRequiresExpression(RequiresToken); 3427 return false; 3428 case tok::l_paren: 3429 // Clauses and expression can start with a paren, it's unclear what we have. 3430 break; 3431 default: 3432 // All other tokens can only be a clause. 3433 parseRequiresClause(RequiresToken); 3434 return true; 3435 } 3436 3437 // Looking forward we would have to decide if there are function declaration 3438 // like arguments to the requires expression: 3439 // requires (T t) { 3440 // Or there is a constraint expression for the requires clause: 3441 // requires (C<T> && ... 3442 3443 // But first let's look behind. 3444 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3445 3446 if (!PreviousNonComment || 3447 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3448 // If there is no token, or an expression left brace, we are a requires 3449 // clause within a requires expression. 3450 parseRequiresClause(RequiresToken); 3451 return true; 3452 } 3453 3454 switch (PreviousNonComment->Tok.getKind()) { 3455 case tok::greater: 3456 case tok::r_paren: 3457 case tok::kw_noexcept: 3458 case tok::kw_const: 3459 // This is a requires clause. 3460 parseRequiresClause(RequiresToken); 3461 return true; 3462 case tok::amp: 3463 case tok::ampamp: { 3464 // This can be either: 3465 // if (... && requires (T t) ...) 3466 // Or 3467 // void member(...) && requires (C<T> ... 3468 // We check the one token before that for a const: 3469 // void member(...) const && requires (C<T> ... 3470 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3471 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3472 parseRequiresClause(RequiresToken); 3473 return true; 3474 } 3475 break; 3476 } 3477 default: 3478 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) { 3479 // This is a requires clause. 3480 parseRequiresClause(RequiresToken); 3481 return true; 3482 } 3483 // It's an expression. 3484 parseRequiresExpression(RequiresToken); 3485 return false; 3486 } 3487 3488 // Now we look forward and try to check if the paren content is a parameter 3489 // list. The parameters can be cv-qualified and contain references or 3490 // pointers. 3491 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3492 // of stuff: typename, const, *, &, &&, ::, identifiers. 3493 3494 unsigned StoredPosition = Tokens->getPosition(); 3495 FormatToken *NextToken = Tokens->getNextToken(); 3496 int Lookahead = 0; 3497 auto PeekNext = [&Lookahead, &NextToken, this] { 3498 ++Lookahead; 3499 NextToken = Tokens->getNextToken(); 3500 }; 3501 3502 bool FoundType = false; 3503 bool LastWasColonColon = false; 3504 int OpenAngles = 0; 3505 3506 for (; Lookahead < 50; PeekNext()) { 3507 switch (NextToken->Tok.getKind()) { 3508 case tok::kw_volatile: 3509 case tok::kw_const: 3510 case tok::comma: 3511 if (OpenAngles == 0) { 3512 FormatTok = Tokens->setPosition(StoredPosition); 3513 parseRequiresExpression(RequiresToken); 3514 return false; 3515 } 3516 break; 3517 case tok::eof: 3518 // Break out of the loop. 3519 Lookahead = 50; 3520 break; 3521 case tok::coloncolon: 3522 LastWasColonColon = true; 3523 break; 3524 case tok::kw_decltype: 3525 case tok::identifier: 3526 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3527 FormatTok = Tokens->setPosition(StoredPosition); 3528 parseRequiresExpression(RequiresToken); 3529 return false; 3530 } 3531 FoundType = true; 3532 LastWasColonColon = false; 3533 break; 3534 case tok::less: 3535 ++OpenAngles; 3536 break; 3537 case tok::greater: 3538 --OpenAngles; 3539 break; 3540 default: 3541 if (NextToken->isTypeName(LangOpts)) { 3542 FormatTok = Tokens->setPosition(StoredPosition); 3543 parseRequiresExpression(RequiresToken); 3544 return false; 3545 } 3546 break; 3547 } 3548 } 3549 // This seems to be a complicated expression, just assume it's a clause. 3550 FormatTok = Tokens->setPosition(StoredPosition); 3551 parseRequiresClause(RequiresToken); 3552 return true; 3553 } 3554 3555 /// \brief Parses a requires clause. 3556 /// \param RequiresToken The requires keyword token, which starts this clause. 3557 /// \pre We need to be on the next token after the requires keyword. 3558 /// \sa parseRequiresExpression 3559 /// 3560 /// Returns if it either has finished parsing the clause, or it detects, that 3561 /// the clause is incorrect. 3562 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3563 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3564 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3565 3566 // If there is no previous token, we are within a requires expression, 3567 // otherwise we will always have the template or function declaration in front 3568 // of it. 3569 bool InRequiresExpression = 3570 !RequiresToken->Previous || 3571 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3572 3573 RequiresToken->setFinalizedType(InRequiresExpression 3574 ? TT_RequiresClauseInARequiresExpression 3575 : TT_RequiresClause); 3576 3577 // NOTE: parseConstraintExpression is only ever called from this function. 3578 // It could be inlined into here. 3579 parseConstraintExpression(); 3580 3581 if (!InRequiresExpression) 3582 FormatTok->Previous->ClosesRequiresClause = true; 3583 } 3584 3585 /// \brief Parses a requires expression. 3586 /// \param RequiresToken The requires keyword token, which starts this clause. 3587 /// \pre We need to be on the next token after the requires keyword. 3588 /// \sa parseRequiresClause 3589 /// 3590 /// Returns if it either has finished parsing the expression, or it detects, 3591 /// that the expression is incorrect. 3592 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3593 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3594 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3595 3596 RequiresToken->setFinalizedType(TT_RequiresExpression); 3597 3598 if (FormatTok->is(tok::l_paren)) { 3599 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3600 parseParens(); 3601 } 3602 3603 if (FormatTok->is(tok::l_brace)) { 3604 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3605 parseChildBlock(); 3606 } 3607 } 3608 3609 /// \brief Parses a constraint expression. 3610 /// 3611 /// This is the body of a requires clause. It returns, when the parsing is 3612 /// complete, or the expression is incorrect. 3613 void UnwrappedLineParser::parseConstraintExpression() { 3614 // The special handling for lambdas is needed since tryToParseLambda() eats a 3615 // token and if a requires expression is the last part of a requires clause 3616 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3617 // not set on the correct token. Thus we need to be aware if we even expect a 3618 // lambda to be possible. 3619 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3620 bool LambdaNextTimeAllowed = true; 3621 3622 // Within lambda declarations, it is permitted to put a requires clause after 3623 // its template parameter list, which would place the requires clause right 3624 // before the parentheses of the parameters of the lambda declaration. Thus, 3625 // we track if we expect to see grouping parentheses at all. 3626 // Without this check, `requires foo<T> (T t)` in the below example would be 3627 // seen as the whole requires clause, accidentally eating the parameters of 3628 // the lambda. 3629 // [&]<typename T> requires foo<T> (T t) { ... }; 3630 bool TopLevelParensAllowed = true; 3631 3632 do { 3633 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3634 3635 switch (FormatTok->Tok.getKind()) { 3636 case tok::kw_requires: { 3637 auto RequiresToken = FormatTok; 3638 nextToken(); 3639 parseRequiresExpression(RequiresToken); 3640 break; 3641 } 3642 3643 case tok::l_paren: 3644 if (!TopLevelParensAllowed) 3645 return; 3646 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3647 TopLevelParensAllowed = false; 3648 break; 3649 3650 case tok::l_square: 3651 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3652 return; 3653 break; 3654 3655 case tok::kw_const: 3656 case tok::semi: 3657 case tok::kw_class: 3658 case tok::kw_struct: 3659 case tok::kw_union: 3660 return; 3661 3662 case tok::l_brace: 3663 // Potential function body. 3664 return; 3665 3666 case tok::ampamp: 3667 case tok::pipepipe: 3668 FormatTok->setFinalizedType(TT_BinaryOperator); 3669 nextToken(); 3670 LambdaNextTimeAllowed = true; 3671 TopLevelParensAllowed = true; 3672 break; 3673 3674 case tok::comma: 3675 case tok::comment: 3676 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3677 nextToken(); 3678 break; 3679 3680 case tok::kw_sizeof: 3681 case tok::greater: 3682 case tok::greaterequal: 3683 case tok::greatergreater: 3684 case tok::less: 3685 case tok::lessequal: 3686 case tok::lessless: 3687 case tok::equalequal: 3688 case tok::exclaim: 3689 case tok::exclaimequal: 3690 case tok::plus: 3691 case tok::minus: 3692 case tok::star: 3693 case tok::slash: 3694 LambdaNextTimeAllowed = true; 3695 TopLevelParensAllowed = true; 3696 // Just eat them. 3697 nextToken(); 3698 break; 3699 3700 case tok::numeric_constant: 3701 case tok::coloncolon: 3702 case tok::kw_true: 3703 case tok::kw_false: 3704 TopLevelParensAllowed = false; 3705 // Just eat them. 3706 nextToken(); 3707 break; 3708 3709 case tok::kw_static_cast: 3710 case tok::kw_const_cast: 3711 case tok::kw_reinterpret_cast: 3712 case tok::kw_dynamic_cast: 3713 nextToken(); 3714 if (FormatTok->isNot(tok::less)) 3715 return; 3716 3717 nextToken(); 3718 parseBracedList(/*IsAngleBracket=*/true); 3719 break; 3720 3721 default: 3722 if (!FormatTok->Tok.getIdentifierInfo()) { 3723 // Identifiers are part of the default case, we check for more then 3724 // tok::identifier to handle builtin type traits. 3725 return; 3726 } 3727 3728 // We need to differentiate identifiers for a template deduction guide, 3729 // variables, or function return types (the constraint expression has 3730 // ended before that), and basically all other cases. But it's easier to 3731 // check the other way around. 3732 assert(FormatTok->Previous); 3733 switch (FormatTok->Previous->Tok.getKind()) { 3734 case tok::coloncolon: // Nested identifier. 3735 case tok::ampamp: // Start of a function or variable for the 3736 case tok::pipepipe: // constraint expression. (binary) 3737 case tok::exclaim: // The same as above, but unary. 3738 case tok::kw_requires: // Initial identifier of a requires clause. 3739 case tok::equal: // Initial identifier of a concept declaration. 3740 break; 3741 default: 3742 return; 3743 } 3744 3745 // Read identifier with optional template declaration. 3746 nextToken(); 3747 if (FormatTok->is(tok::less)) { 3748 nextToken(); 3749 parseBracedList(/*IsAngleBracket=*/true); 3750 } 3751 TopLevelParensAllowed = false; 3752 break; 3753 } 3754 } while (!eof()); 3755 } 3756 3757 bool UnwrappedLineParser::parseEnum() { 3758 const FormatToken &InitialToken = *FormatTok; 3759 3760 // Won't be 'enum' for NS_ENUMs. 3761 if (FormatTok->is(tok::kw_enum)) 3762 nextToken(); 3763 3764 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3765 // declarations. An "enum" keyword followed by a colon would be a syntax 3766 // error and thus assume it is just an identifier. 3767 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3768 return false; 3769 3770 // In protobuf, "enum" can be used as a field name. 3771 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3772 return false; 3773 3774 if (IsCpp) { 3775 // Eat up enum class ... 3776 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3777 nextToken(); 3778 while (FormatTok->is(tok::l_square)) 3779 if (!handleCppAttributes()) 3780 return false; 3781 } 3782 3783 while (FormatTok->Tok.getIdentifierInfo() || 3784 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3785 tok::greater, tok::comma, tok::question, 3786 tok::l_square)) { 3787 if (Style.isVerilog()) { 3788 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3789 nextToken(); 3790 // In Verilog the base type can have dimensions. 3791 while (FormatTok->is(tok::l_square)) 3792 parseSquare(); 3793 } else { 3794 nextToken(); 3795 } 3796 // We can have macros or attributes in between 'enum' and the enum name. 3797 if (FormatTok->is(tok::l_paren)) 3798 parseParens(); 3799 if (FormatTok->is(tok::identifier)) { 3800 nextToken(); 3801 // If there are two identifiers in a row, this is likely an elaborate 3802 // return type. In Java, this can be "implements", etc. 3803 if (IsCpp && FormatTok->is(tok::identifier)) 3804 return false; 3805 } 3806 } 3807 3808 // Just a declaration or something is wrong. 3809 if (FormatTok->isNot(tok::l_brace)) 3810 return true; 3811 FormatTok->setFinalizedType(TT_EnumLBrace); 3812 FormatTok->setBlockKind(BK_Block); 3813 3814 if (Style.Language == FormatStyle::LK_Java) { 3815 // Java enums are different. 3816 parseJavaEnumBody(); 3817 return true; 3818 } 3819 if (Style.Language == FormatStyle::LK_Proto) { 3820 parseBlock(/*MustBeDeclaration=*/true); 3821 return true; 3822 } 3823 3824 if (!Style.AllowShortEnumsOnASingleLine && 3825 ShouldBreakBeforeBrace(Style, InitialToken)) { 3826 addUnwrappedLine(); 3827 } 3828 // Parse enum body. 3829 nextToken(); 3830 if (!Style.AllowShortEnumsOnASingleLine) { 3831 addUnwrappedLine(); 3832 Line->Level += 1; 3833 } 3834 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true); 3835 if (!Style.AllowShortEnumsOnASingleLine) 3836 Line->Level -= 1; 3837 if (HasError) { 3838 if (FormatTok->is(tok::semi)) 3839 nextToken(); 3840 addUnwrappedLine(); 3841 } 3842 setPreviousRBraceType(TT_EnumRBrace); 3843 return true; 3844 3845 // There is no addUnwrappedLine() here so that we fall through to parsing a 3846 // structural element afterwards. Thus, in "enum A {} n, m;", 3847 // "} n, m;" will end up in one unwrapped line. 3848 } 3849 3850 bool UnwrappedLineParser::parseStructLike() { 3851 // parseRecord falls through and does not yet add an unwrapped line as a 3852 // record declaration or definition can start a structural element. 3853 parseRecord(); 3854 // This does not apply to Java, JavaScript and C#. 3855 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3856 Style.isCSharp()) { 3857 if (FormatTok->is(tok::semi)) 3858 nextToken(); 3859 addUnwrappedLine(); 3860 return true; 3861 } 3862 return false; 3863 } 3864 3865 namespace { 3866 // A class used to set and restore the Token position when peeking 3867 // ahead in the token source. 3868 class ScopedTokenPosition { 3869 unsigned StoredPosition; 3870 FormatTokenSource *Tokens; 3871 3872 public: 3873 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3874 assert(Tokens && "Tokens expected to not be null"); 3875 StoredPosition = Tokens->getPosition(); 3876 } 3877 3878 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3879 }; 3880 } // namespace 3881 3882 // Look to see if we have [[ by looking ahead, if 3883 // its not then rewind to the original position. 3884 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3885 ScopedTokenPosition AutoPosition(Tokens); 3886 FormatToken *Tok = Tokens->getNextToken(); 3887 // We already read the first [ check for the second. 3888 if (Tok->isNot(tok::l_square)) 3889 return false; 3890 // Double check that the attribute is just something 3891 // fairly simple. 3892 while (Tok->isNot(tok::eof)) { 3893 if (Tok->is(tok::r_square)) 3894 break; 3895 Tok = Tokens->getNextToken(); 3896 } 3897 if (Tok->is(tok::eof)) 3898 return false; 3899 Tok = Tokens->getNextToken(); 3900 if (Tok->isNot(tok::r_square)) 3901 return false; 3902 Tok = Tokens->getNextToken(); 3903 if (Tok->is(tok::semi)) 3904 return false; 3905 return true; 3906 } 3907 3908 void UnwrappedLineParser::parseJavaEnumBody() { 3909 assert(FormatTok->is(tok::l_brace)); 3910 const FormatToken *OpeningBrace = FormatTok; 3911 3912 // Determine whether the enum is simple, i.e. does not have a semicolon or 3913 // constants with class bodies. Simple enums can be formatted like braced 3914 // lists, contracted to a single line, etc. 3915 unsigned StoredPosition = Tokens->getPosition(); 3916 bool IsSimple = true; 3917 FormatToken *Tok = Tokens->getNextToken(); 3918 while (Tok->isNot(tok::eof)) { 3919 if (Tok->is(tok::r_brace)) 3920 break; 3921 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3922 IsSimple = false; 3923 break; 3924 } 3925 // FIXME: This will also mark enums with braces in the arguments to enum 3926 // constants as "not simple". This is probably fine in practice, though. 3927 Tok = Tokens->getNextToken(); 3928 } 3929 FormatTok = Tokens->setPosition(StoredPosition); 3930 3931 if (IsSimple) { 3932 nextToken(); 3933 parseBracedList(); 3934 addUnwrappedLine(); 3935 return; 3936 } 3937 3938 // Parse the body of a more complex enum. 3939 // First add a line for everything up to the "{". 3940 nextToken(); 3941 addUnwrappedLine(); 3942 ++Line->Level; 3943 3944 // Parse the enum constants. 3945 while (!eof()) { 3946 if (FormatTok->is(tok::l_brace)) { 3947 // Parse the constant's class body. 3948 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3949 /*MunchSemi=*/false); 3950 } else if (FormatTok->is(tok::l_paren)) { 3951 parseParens(); 3952 } else if (FormatTok->is(tok::comma)) { 3953 nextToken(); 3954 addUnwrappedLine(); 3955 } else if (FormatTok->is(tok::semi)) { 3956 nextToken(); 3957 addUnwrappedLine(); 3958 break; 3959 } else if (FormatTok->is(tok::r_brace)) { 3960 addUnwrappedLine(); 3961 break; 3962 } else { 3963 nextToken(); 3964 } 3965 } 3966 3967 // Parse the class body after the enum's ";" if any. 3968 parseLevel(OpeningBrace); 3969 nextToken(); 3970 --Line->Level; 3971 addUnwrappedLine(); 3972 } 3973 3974 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3975 const FormatToken &InitialToken = *FormatTok; 3976 nextToken(); 3977 3978 const FormatToken *ClassName = nullptr; 3979 bool IsDerived = false; 3980 auto IsNonMacroIdentifier = [](const FormatToken *Tok) { 3981 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper(); 3982 }; 3983 // JavaScript/TypeScript supports anonymous classes like: 3984 // a = class extends foo { } 3985 bool JSPastExtendsOrImplements = false; 3986 // The actual identifier can be a nested name specifier, and in macros 3987 // it is often token-pasted. 3988 // An [[attribute]] can be before the identifier. 3989 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3990 tok::kw_alignas, tok::l_square) || 3991 FormatTok->isAttribute() || 3992 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3993 FormatTok->isOneOf(tok::period, tok::comma))) { 3994 if (Style.isJavaScript() && 3995 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3996 JSPastExtendsOrImplements = true; 3997 // JavaScript/TypeScript supports inline object types in 3998 // extends/implements positions: 3999 // class Foo implements {bar: number} { } 4000 nextToken(); 4001 if (FormatTok->is(tok::l_brace)) { 4002 tryToParseBracedList(); 4003 continue; 4004 } 4005 } 4006 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 4007 continue; 4008 const auto *Previous = FormatTok; 4009 nextToken(); 4010 switch (FormatTok->Tok.getKind()) { 4011 case tok::l_paren: 4012 // We can have macros in between 'class' and the class name. 4013 if (!IsNonMacroIdentifier(Previous) || 4014 // e.g. `struct macro(a) S { int i; };` 4015 Previous->Previous == &InitialToken) { 4016 parseParens(); 4017 } 4018 break; 4019 case tok::coloncolon: 4020 case tok::hashhash: 4021 break; 4022 default: 4023 if (!JSPastExtendsOrImplements && !ClassName && 4024 Previous->is(tok::identifier) && Previous->isNot(TT_AttributeMacro)) { 4025 ClassName = Previous; 4026 } 4027 } 4028 } 4029 4030 auto IsListInitialization = [&] { 4031 if (!ClassName || IsDerived) 4032 return false; 4033 assert(FormatTok->is(tok::l_brace)); 4034 const auto *Prev = FormatTok->getPreviousNonComment(); 4035 assert(Prev); 4036 return Prev != ClassName && Prev->is(tok::identifier) && 4037 Prev->isNot(Keywords.kw_final) && tryToParseBracedList(); 4038 }; 4039 4040 if (FormatTok->isOneOf(tok::colon, tok::less)) { 4041 int AngleNestingLevel = 0; 4042 do { 4043 if (FormatTok->is(tok::less)) 4044 ++AngleNestingLevel; 4045 else if (FormatTok->is(tok::greater)) 4046 --AngleNestingLevel; 4047 4048 if (AngleNestingLevel == 0) { 4049 if (FormatTok->is(tok::colon)) { 4050 IsDerived = true; 4051 } else if (FormatTok->is(tok::identifier) && 4052 FormatTok->Previous->is(tok::coloncolon)) { 4053 ClassName = FormatTok; 4054 } else if (FormatTok->is(tok::l_paren) && 4055 IsNonMacroIdentifier(FormatTok->Previous)) { 4056 break; 4057 } 4058 } 4059 if (FormatTok->is(tok::l_brace)) { 4060 if (AngleNestingLevel == 0 && IsListInitialization()) 4061 return; 4062 calculateBraceTypes(/*ExpectClassBody=*/true); 4063 if (!tryToParseBracedList()) 4064 break; 4065 } 4066 if (FormatTok->is(tok::l_square)) { 4067 FormatToken *Previous = FormatTok->Previous; 4068 if (!Previous || (Previous->isNot(tok::r_paren) && 4069 !Previous->isTypeOrIdentifier(LangOpts))) { 4070 // Don't try parsing a lambda if we had a closing parenthesis before, 4071 // it was probably a pointer to an array: int (*)[]. 4072 if (!tryToParseLambda()) 4073 continue; 4074 } else { 4075 parseSquare(); 4076 continue; 4077 } 4078 } 4079 if (FormatTok->is(tok::semi)) 4080 return; 4081 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 4082 addUnwrappedLine(); 4083 nextToken(); 4084 parseCSharpGenericTypeConstraint(); 4085 break; 4086 } 4087 nextToken(); 4088 } while (!eof()); 4089 } 4090 4091 auto GetBraceTypes = 4092 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> { 4093 switch (RecordTok.Tok.getKind()) { 4094 case tok::kw_class: 4095 return {TT_ClassLBrace, TT_ClassRBrace}; 4096 case tok::kw_struct: 4097 return {TT_StructLBrace, TT_StructRBrace}; 4098 case tok::kw_union: 4099 return {TT_UnionLBrace, TT_UnionRBrace}; 4100 default: 4101 // Useful for e.g. interface. 4102 return {TT_RecordLBrace, TT_RecordRBrace}; 4103 } 4104 }; 4105 if (FormatTok->is(tok::l_brace)) { 4106 if (IsListInitialization()) 4107 return; 4108 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken); 4109 FormatTok->setFinalizedType(OpenBraceType); 4110 if (ParseAsExpr) { 4111 parseChildBlock(); 4112 } else { 4113 if (ShouldBreakBeforeBrace(Style, InitialToken)) 4114 addUnwrappedLine(); 4115 4116 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 4117 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 4118 } 4119 setPreviousRBraceType(ClosingBraceType); 4120 } 4121 // There is no addUnwrappedLine() here so that we fall through to parsing a 4122 // structural element afterwards. Thus, in "class A {} n, m;", 4123 // "} n, m;" will end up in one unwrapped line. 4124 } 4125 4126 void UnwrappedLineParser::parseObjCMethod() { 4127 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 4128 "'(' or identifier expected."); 4129 do { 4130 if (FormatTok->is(tok::semi)) { 4131 nextToken(); 4132 addUnwrappedLine(); 4133 return; 4134 } else if (FormatTok->is(tok::l_brace)) { 4135 if (Style.BraceWrapping.AfterFunction) 4136 addUnwrappedLine(); 4137 parseBlock(); 4138 addUnwrappedLine(); 4139 return; 4140 } else { 4141 nextToken(); 4142 } 4143 } while (!eof()); 4144 } 4145 4146 void UnwrappedLineParser::parseObjCProtocolList() { 4147 assert(FormatTok->is(tok::less) && "'<' expected."); 4148 do { 4149 nextToken(); 4150 // Early exit in case someone forgot a close angle. 4151 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4152 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4153 return; 4154 } 4155 } while (!eof() && FormatTok->isNot(tok::greater)); 4156 nextToken(); // Skip '>'. 4157 } 4158 4159 void UnwrappedLineParser::parseObjCUntilAtEnd() { 4160 do { 4161 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 4162 nextToken(); 4163 addUnwrappedLine(); 4164 break; 4165 } 4166 if (FormatTok->is(tok::l_brace)) { 4167 parseBlock(); 4168 // In ObjC interfaces, nothing should be following the "}". 4169 addUnwrappedLine(); 4170 } else if (FormatTok->is(tok::r_brace)) { 4171 // Ignore stray "}". parseStructuralElement doesn't consume them. 4172 nextToken(); 4173 addUnwrappedLine(); 4174 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 4175 nextToken(); 4176 parseObjCMethod(); 4177 } else { 4178 parseStructuralElement(); 4179 } 4180 } while (!eof()); 4181 } 4182 4183 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 4184 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 4185 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 4186 nextToken(); 4187 nextToken(); // interface name 4188 4189 // @interface can be followed by a lightweight generic 4190 // specialization list, then either a base class or a category. 4191 if (FormatTok->is(tok::less)) 4192 parseObjCLightweightGenerics(); 4193 if (FormatTok->is(tok::colon)) { 4194 nextToken(); 4195 nextToken(); // base class name 4196 // The base class can also have lightweight generics applied to it. 4197 if (FormatTok->is(tok::less)) 4198 parseObjCLightweightGenerics(); 4199 } else if (FormatTok->is(tok::l_paren)) { 4200 // Skip category, if present. 4201 parseParens(); 4202 } 4203 4204 if (FormatTok->is(tok::less)) 4205 parseObjCProtocolList(); 4206 4207 if (FormatTok->is(tok::l_brace)) { 4208 if (Style.BraceWrapping.AfterObjCDeclaration) 4209 addUnwrappedLine(); 4210 parseBlock(/*MustBeDeclaration=*/true); 4211 } 4212 4213 // With instance variables, this puts '}' on its own line. Without instance 4214 // variables, this ends the @interface line. 4215 addUnwrappedLine(); 4216 4217 parseObjCUntilAtEnd(); 4218 } 4219 4220 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4221 assert(FormatTok->is(tok::less)); 4222 // Unlike protocol lists, generic parameterizations support 4223 // nested angles: 4224 // 4225 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4226 // NSObject <NSCopying, NSSecureCoding> 4227 // 4228 // so we need to count how many open angles we have left. 4229 unsigned NumOpenAngles = 1; 4230 do { 4231 nextToken(); 4232 // Early exit in case someone forgot a close angle. 4233 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4234 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4235 break; 4236 } 4237 if (FormatTok->is(tok::less)) { 4238 ++NumOpenAngles; 4239 } else if (FormatTok->is(tok::greater)) { 4240 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4241 --NumOpenAngles; 4242 } 4243 } while (!eof() && NumOpenAngles != 0); 4244 nextToken(); // Skip '>'. 4245 } 4246 4247 // Returns true for the declaration/definition form of @protocol, 4248 // false for the expression form. 4249 bool UnwrappedLineParser::parseObjCProtocol() { 4250 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4251 nextToken(); 4252 4253 if (FormatTok->is(tok::l_paren)) { 4254 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4255 return false; 4256 } 4257 4258 // The definition/declaration form, 4259 // @protocol Foo 4260 // - (int)someMethod; 4261 // @end 4262 4263 nextToken(); // protocol name 4264 4265 if (FormatTok->is(tok::less)) 4266 parseObjCProtocolList(); 4267 4268 // Check for protocol declaration. 4269 if (FormatTok->is(tok::semi)) { 4270 nextToken(); 4271 addUnwrappedLine(); 4272 return true; 4273 } 4274 4275 addUnwrappedLine(); 4276 parseObjCUntilAtEnd(); 4277 return true; 4278 } 4279 4280 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4281 bool IsImport = FormatTok->is(Keywords.kw_import); 4282 assert(IsImport || FormatTok->is(tok::kw_export)); 4283 nextToken(); 4284 4285 // Consume the "default" in "export default class/function". 4286 if (FormatTok->is(tok::kw_default)) 4287 nextToken(); 4288 4289 // Consume "async function", "function" and "default function", so that these 4290 // get parsed as free-standing JS functions, i.e. do not require a trailing 4291 // semicolon. 4292 if (FormatTok->is(Keywords.kw_async)) 4293 nextToken(); 4294 if (FormatTok->is(Keywords.kw_function)) { 4295 nextToken(); 4296 return; 4297 } 4298 4299 // For imports, `export *`, `export {...}`, consume the rest of the line up 4300 // to the terminating `;`. For everything else, just return and continue 4301 // parsing the structural element, i.e. the declaration or expression for 4302 // `export default`. 4303 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4304 !FormatTok->isStringLiteral() && 4305 !(FormatTok->is(Keywords.kw_type) && 4306 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4307 return; 4308 } 4309 4310 while (!eof()) { 4311 if (FormatTok->is(tok::semi)) 4312 return; 4313 if (Line->Tokens.empty()) { 4314 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4315 // import statement should terminate. 4316 return; 4317 } 4318 if (FormatTok->is(tok::l_brace)) { 4319 FormatTok->setBlockKind(BK_Block); 4320 nextToken(); 4321 parseBracedList(); 4322 } else { 4323 nextToken(); 4324 } 4325 } 4326 } 4327 4328 void UnwrappedLineParser::parseStatementMacro() { 4329 nextToken(); 4330 if (FormatTok->is(tok::l_paren)) 4331 parseParens(); 4332 if (FormatTok->is(tok::semi)) 4333 nextToken(); 4334 addUnwrappedLine(); 4335 } 4336 4337 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4338 // consume things like a::`b.c[d:e] or a::* 4339 while (true) { 4340 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4341 tok::coloncolon, tok::hash) || 4342 Keywords.isVerilogIdentifier(*FormatTok)) { 4343 nextToken(); 4344 } else if (FormatTok->is(tok::l_square)) { 4345 parseSquare(); 4346 } else { 4347 break; 4348 } 4349 } 4350 } 4351 4352 void UnwrappedLineParser::parseVerilogSensitivityList() { 4353 if (FormatTok->isNot(tok::at)) 4354 return; 4355 nextToken(); 4356 // A block event expression has 2 at signs. 4357 if (FormatTok->is(tok::at)) 4358 nextToken(); 4359 switch (FormatTok->Tok.getKind()) { 4360 case tok::star: 4361 nextToken(); 4362 break; 4363 case tok::l_paren: 4364 parseParens(); 4365 break; 4366 default: 4367 parseVerilogHierarchyIdentifier(); 4368 break; 4369 } 4370 } 4371 4372 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4373 unsigned AddLevels = 0; 4374 4375 if (FormatTok->is(Keywords.kw_clocking)) { 4376 nextToken(); 4377 if (Keywords.isVerilogIdentifier(*FormatTok)) 4378 nextToken(); 4379 parseVerilogSensitivityList(); 4380 if (FormatTok->is(tok::semi)) 4381 nextToken(); 4382 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4383 Keywords.kw_casez, Keywords.kw_randcase, 4384 Keywords.kw_randsequence)) { 4385 if (Style.IndentCaseLabels) 4386 AddLevels++; 4387 nextToken(); 4388 if (FormatTok->is(tok::l_paren)) { 4389 FormatTok->setFinalizedType(TT_ConditionLParen); 4390 parseParens(); 4391 } 4392 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4393 nextToken(); 4394 // The case header has no semicolon. 4395 } else { 4396 // "module" etc. 4397 nextToken(); 4398 // all the words like the name of the module and specifiers like 4399 // "automatic" and the width of function return type 4400 while (true) { 4401 if (FormatTok->is(tok::l_square)) { 4402 auto Prev = FormatTok->getPreviousNonComment(); 4403 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4404 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4405 parseSquare(); 4406 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4407 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4408 nextToken(); 4409 } else { 4410 break; 4411 } 4412 } 4413 4414 auto NewLine = [this]() { 4415 addUnwrappedLine(); 4416 Line->IsContinuation = true; 4417 }; 4418 4419 // package imports 4420 while (FormatTok->is(Keywords.kw_import)) { 4421 NewLine(); 4422 nextToken(); 4423 parseVerilogHierarchyIdentifier(); 4424 if (FormatTok->is(tok::semi)) 4425 nextToken(); 4426 } 4427 4428 // parameters and ports 4429 if (FormatTok->is(Keywords.kw_verilogHash)) { 4430 NewLine(); 4431 nextToken(); 4432 if (FormatTok->is(tok::l_paren)) { 4433 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4434 parseParens(); 4435 } 4436 } 4437 if (FormatTok->is(tok::l_paren)) { 4438 NewLine(); 4439 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4440 parseParens(); 4441 } 4442 4443 // extends and implements 4444 if (FormatTok->is(Keywords.kw_extends)) { 4445 NewLine(); 4446 nextToken(); 4447 parseVerilogHierarchyIdentifier(); 4448 if (FormatTok->is(tok::l_paren)) 4449 parseParens(); 4450 } 4451 if (FormatTok->is(Keywords.kw_implements)) { 4452 NewLine(); 4453 do { 4454 nextToken(); 4455 parseVerilogHierarchyIdentifier(); 4456 } while (FormatTok->is(tok::comma)); 4457 } 4458 4459 // Coverage event for cover groups. 4460 if (FormatTok->is(tok::at)) { 4461 NewLine(); 4462 parseVerilogSensitivityList(); 4463 } 4464 4465 if (FormatTok->is(tok::semi)) 4466 nextToken(/*LevelDifference=*/1); 4467 addUnwrappedLine(); 4468 } 4469 4470 return AddLevels; 4471 } 4472 4473 void UnwrappedLineParser::parseVerilogTable() { 4474 assert(FormatTok->is(Keywords.kw_table)); 4475 nextToken(/*LevelDifference=*/1); 4476 addUnwrappedLine(); 4477 4478 auto InitialLevel = Line->Level++; 4479 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4480 FormatToken *Tok = FormatTok; 4481 nextToken(); 4482 if (Tok->is(tok::semi)) 4483 addUnwrappedLine(); 4484 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4485 Tok->setFinalizedType(TT_VerilogTableItem); 4486 } 4487 Line->Level = InitialLevel; 4488 nextToken(/*LevelDifference=*/-1); 4489 addUnwrappedLine(); 4490 } 4491 4492 void UnwrappedLineParser::parseVerilogCaseLabel() { 4493 // The label will get unindented in AnnotatingParser. If there are no leading 4494 // spaces, indent the rest here so that things inside the block will be 4495 // indented relative to things outside. We don't use parseLabel because we 4496 // don't know whether this colon is a label or a ternary expression at this 4497 // point. 4498 auto OrigLevel = Line->Level; 4499 auto FirstLine = CurrentLines->size(); 4500 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4501 ++Line->Level; 4502 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4503 --Line->Level; 4504 parseStructuralElement(); 4505 // Restore the indentation in both the new line and the line that has the 4506 // label. 4507 if (CurrentLines->size() > FirstLine) 4508 (*CurrentLines)[FirstLine].Level = OrigLevel; 4509 Line->Level = OrigLevel; 4510 } 4511 4512 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4513 for (const auto &N : Line.Tokens) { 4514 if (N.Tok->MacroCtx) 4515 return true; 4516 for (const UnwrappedLine &Child : N.Children) 4517 if (containsExpansion(Child)) 4518 return true; 4519 } 4520 return false; 4521 } 4522 4523 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4524 if (Line->Tokens.empty()) 4525 return; 4526 LLVM_DEBUG({ 4527 if (!parsingPPDirective()) { 4528 llvm::dbgs() << "Adding unwrapped line:\n"; 4529 printDebugInfo(*Line); 4530 } 4531 }); 4532 4533 // If this line closes a block when in Whitesmiths mode, remember that 4534 // information so that the level can be decreased after the line is added. 4535 // This has to happen after the addition of the line since the line itself 4536 // needs to be indented. 4537 bool ClosesWhitesmithsBlock = 4538 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4539 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4540 4541 // If the current line was expanded from a macro call, we use it to 4542 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4543 // line and the unexpanded token stream. 4544 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4545 if (!Reconstruct) 4546 Reconstruct.emplace(Line->Level, Unexpanded); 4547 Reconstruct->addLine(*Line); 4548 4549 // While the reconstructed unexpanded lines are stored in the normal 4550 // flow of lines, the expanded lines are stored on the side to be analyzed 4551 // in an extra step. 4552 CurrentExpandedLines.push_back(std::move(*Line)); 4553 4554 if (Reconstruct->finished()) { 4555 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4556 assert(!Reconstructed.Tokens.empty() && 4557 "Reconstructed must at least contain the macro identifier."); 4558 assert(!parsingPPDirective()); 4559 LLVM_DEBUG({ 4560 llvm::dbgs() << "Adding unexpanded line:\n"; 4561 printDebugInfo(Reconstructed); 4562 }); 4563 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4564 Lines.push_back(std::move(Reconstructed)); 4565 CurrentExpandedLines.clear(); 4566 Reconstruct.reset(); 4567 } 4568 } else { 4569 // At the top level we only get here when no unexpansion is going on, or 4570 // when conditional formatting led to unfinished macro reconstructions. 4571 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4572 CurrentLines->push_back(std::move(*Line)); 4573 } 4574 Line->Tokens.clear(); 4575 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4576 Line->FirstStartColumn = 0; 4577 Line->IsContinuation = false; 4578 Line->SeenDecltypeAuto = false; 4579 4580 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4581 --Line->Level; 4582 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4583 CurrentLines->append( 4584 std::make_move_iterator(PreprocessorDirectives.begin()), 4585 std::make_move_iterator(PreprocessorDirectives.end())); 4586 PreprocessorDirectives.clear(); 4587 } 4588 // Disconnect the current token from the last token on the previous line. 4589 FormatTok->Previous = nullptr; 4590 } 4591 4592 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4593 4594 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4595 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4596 FormatTok.NewlinesBefore > 0; 4597 } 4598 4599 // Checks if \p FormatTok is a line comment that continues the line comment 4600 // section on \p Line. 4601 static bool 4602 continuesLineCommentSection(const FormatToken &FormatTok, 4603 const UnwrappedLine &Line, 4604 const llvm::Regex &CommentPragmasRegex) { 4605 if (Line.Tokens.empty()) 4606 return false; 4607 4608 StringRef IndentContent = FormatTok.TokenText; 4609 if (FormatTok.TokenText.starts_with("//") || 4610 FormatTok.TokenText.starts_with("/*")) { 4611 IndentContent = FormatTok.TokenText.substr(2); 4612 } 4613 if (CommentPragmasRegex.match(IndentContent)) 4614 return false; 4615 4616 // If Line starts with a line comment, then FormatTok continues the comment 4617 // section if its original column is greater or equal to the original start 4618 // column of the line. 4619 // 4620 // Define the min column token of a line as follows: if a line ends in '{' or 4621 // contains a '{' followed by a line comment, then the min column token is 4622 // that '{'. Otherwise, the min column token of the line is the first token of 4623 // the line. 4624 // 4625 // If Line starts with a token other than a line comment, then FormatTok 4626 // continues the comment section if its original column is greater than the 4627 // original start column of the min column token of the line. 4628 // 4629 // For example, the second line comment continues the first in these cases: 4630 // 4631 // // first line 4632 // // second line 4633 // 4634 // and: 4635 // 4636 // // first line 4637 // // second line 4638 // 4639 // and: 4640 // 4641 // int i; // first line 4642 // // second line 4643 // 4644 // and: 4645 // 4646 // do { // first line 4647 // // second line 4648 // int i; 4649 // } while (true); 4650 // 4651 // and: 4652 // 4653 // enum { 4654 // a, // first line 4655 // // second line 4656 // b 4657 // }; 4658 // 4659 // The second line comment doesn't continue the first in these cases: 4660 // 4661 // // first line 4662 // // second line 4663 // 4664 // and: 4665 // 4666 // int i; // first line 4667 // // second line 4668 // 4669 // and: 4670 // 4671 // do { // first line 4672 // // second line 4673 // int i; 4674 // } while (true); 4675 // 4676 // and: 4677 // 4678 // enum { 4679 // a, // first line 4680 // // second line 4681 // }; 4682 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4683 4684 // Scan for '{//'. If found, use the column of '{' as a min column for line 4685 // comment section continuation. 4686 const FormatToken *PreviousToken = nullptr; 4687 for (const UnwrappedLineNode &Node : Line.Tokens) { 4688 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4689 isLineComment(*Node.Tok)) { 4690 MinColumnToken = PreviousToken; 4691 break; 4692 } 4693 PreviousToken = Node.Tok; 4694 4695 // Grab the last newline preceding a token in this unwrapped line. 4696 if (Node.Tok->NewlinesBefore > 0) 4697 MinColumnToken = Node.Tok; 4698 } 4699 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4700 MinColumnToken = PreviousToken; 4701 4702 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4703 MinColumnToken); 4704 } 4705 4706 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4707 bool JustComments = Line->Tokens.empty(); 4708 for (FormatToken *Tok : CommentsBeforeNextToken) { 4709 // Line comments that belong to the same line comment section are put on the 4710 // same line since later we might want to reflow content between them. 4711 // Additional fine-grained breaking of line comment sections is controlled 4712 // by the class BreakableLineCommentSection in case it is desirable to keep 4713 // several line comment sections in the same unwrapped line. 4714 // 4715 // FIXME: Consider putting separate line comment sections as children to the 4716 // unwrapped line instead. 4717 Tok->ContinuesLineCommentSection = 4718 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4719 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4720 addUnwrappedLine(); 4721 pushToken(Tok); 4722 } 4723 if (NewlineBeforeNext && JustComments) 4724 addUnwrappedLine(); 4725 CommentsBeforeNextToken.clear(); 4726 } 4727 4728 void UnwrappedLineParser::nextToken(int LevelDifference) { 4729 if (eof()) 4730 return; 4731 flushComments(isOnNewLine(*FormatTok)); 4732 pushToken(FormatTok); 4733 FormatToken *Previous = FormatTok; 4734 if (!Style.isJavaScript()) 4735 readToken(LevelDifference); 4736 else 4737 readTokenWithJavaScriptASI(); 4738 FormatTok->Previous = Previous; 4739 if (Style.isVerilog()) { 4740 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4741 // keywords like `begin`, we can't treat them the same as left braces 4742 // because some contexts require one of them. For example structs use 4743 // braces and if blocks use keywords, and a left brace can occur in an if 4744 // statement, but it is not a block. For keywords like `end`, we simply 4745 // treat them the same as right braces. 4746 if (Keywords.isVerilogEnd(*FormatTok)) 4747 FormatTok->Tok.setKind(tok::r_brace); 4748 } 4749 } 4750 4751 void UnwrappedLineParser::distributeComments( 4752 const SmallVectorImpl<FormatToken *> &Comments, 4753 const FormatToken *NextTok) { 4754 // Whether or not a line comment token continues a line is controlled by 4755 // the method continuesLineCommentSection, with the following caveat: 4756 // 4757 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4758 // that each comment line from the trail is aligned with the next token, if 4759 // the next token exists. If a trail exists, the beginning of the maximal 4760 // trail is marked as a start of a new comment section. 4761 // 4762 // For example in this code: 4763 // 4764 // int a; // line about a 4765 // // line 1 about b 4766 // // line 2 about b 4767 // int b; 4768 // 4769 // the two lines about b form a maximal trail, so there are two sections, the 4770 // first one consisting of the single comment "// line about a" and the 4771 // second one consisting of the next two comments. 4772 if (Comments.empty()) 4773 return; 4774 bool ShouldPushCommentsInCurrentLine = true; 4775 bool HasTrailAlignedWithNextToken = false; 4776 unsigned StartOfTrailAlignedWithNextToken = 0; 4777 if (NextTok) { 4778 // We are skipping the first element intentionally. 4779 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4780 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4781 HasTrailAlignedWithNextToken = true; 4782 StartOfTrailAlignedWithNextToken = i; 4783 } 4784 } 4785 } 4786 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4787 FormatToken *FormatTok = Comments[i]; 4788 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4789 FormatTok->ContinuesLineCommentSection = false; 4790 } else { 4791 FormatTok->ContinuesLineCommentSection = 4792 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4793 } 4794 if (!FormatTok->ContinuesLineCommentSection && 4795 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4796 ShouldPushCommentsInCurrentLine = false; 4797 } 4798 if (ShouldPushCommentsInCurrentLine) 4799 pushToken(FormatTok); 4800 else 4801 CommentsBeforeNextToken.push_back(FormatTok); 4802 } 4803 } 4804 4805 void UnwrappedLineParser::readToken(int LevelDifference) { 4806 SmallVector<FormatToken *, 1> Comments; 4807 bool PreviousWasComment = false; 4808 bool FirstNonCommentOnLine = false; 4809 do { 4810 FormatTok = Tokens->getNextToken(); 4811 assert(FormatTok); 4812 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd, 4813 TT_ConflictAlternative)) { 4814 if (FormatTok->is(TT_ConflictStart)) 4815 conditionalCompilationStart(/*Unreachable=*/false); 4816 else if (FormatTok->is(TT_ConflictAlternative)) 4817 conditionalCompilationAlternative(); 4818 else if (FormatTok->is(TT_ConflictEnd)) 4819 conditionalCompilationEnd(); 4820 FormatTok = Tokens->getNextToken(); 4821 FormatTok->MustBreakBefore = true; 4822 FormatTok->MustBreakBeforeFinalized = true; 4823 } 4824 4825 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4826 const FormatToken &Tok, 4827 bool PreviousWasComment) { 4828 auto IsFirstOnLine = [](const FormatToken &Tok) { 4829 return Tok.HasUnescapedNewline || Tok.IsFirst; 4830 }; 4831 4832 // Consider preprocessor directives preceded by block comments as first 4833 // on line. 4834 if (PreviousWasComment) 4835 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4836 return IsFirstOnLine(Tok); 4837 }; 4838 4839 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4840 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4841 PreviousWasComment = FormatTok->is(tok::comment); 4842 4843 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4844 (!Style.isVerilog() || 4845 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4846 FirstNonCommentOnLine) { 4847 distributeComments(Comments, FormatTok); 4848 Comments.clear(); 4849 // If there is an unfinished unwrapped line, we flush the preprocessor 4850 // directives only after that unwrapped line was finished later. 4851 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4852 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4853 assert((LevelDifference >= 0 || 4854 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4855 "LevelDifference makes Line->Level negative"); 4856 Line->Level += LevelDifference; 4857 // Comments stored before the preprocessor directive need to be output 4858 // before the preprocessor directive, at the same level as the 4859 // preprocessor directive, as we consider them to apply to the directive. 4860 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4861 PPBranchLevel > 0) { 4862 Line->Level += PPBranchLevel; 4863 } 4864 assert(Line->Level >= Line->UnbracedBodyLevel); 4865 Line->Level -= Line->UnbracedBodyLevel; 4866 flushComments(isOnNewLine(*FormatTok)); 4867 parsePPDirective(); 4868 PreviousWasComment = FormatTok->is(tok::comment); 4869 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4870 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4871 } 4872 4873 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4874 !Line->InPPDirective) { 4875 continue; 4876 } 4877 4878 if (FormatTok->is(tok::identifier) && 4879 Macros.defined(FormatTok->TokenText) && 4880 // FIXME: Allow expanding macros in preprocessor directives. 4881 !Line->InPPDirective) { 4882 FormatToken *ID = FormatTok; 4883 unsigned Position = Tokens->getPosition(); 4884 4885 // To correctly parse the code, we need to replace the tokens of the macro 4886 // call with its expansion. 4887 auto PreCall = std::move(Line); 4888 Line.reset(new UnwrappedLine); 4889 bool OldInExpansion = InExpansion; 4890 InExpansion = true; 4891 // We parse the macro call into a new line. 4892 auto Args = parseMacroCall(); 4893 InExpansion = OldInExpansion; 4894 assert(Line->Tokens.front().Tok == ID); 4895 // And remember the unexpanded macro call tokens. 4896 auto UnexpandedLine = std::move(Line); 4897 // Reset to the old line. 4898 Line = std::move(PreCall); 4899 4900 LLVM_DEBUG({ 4901 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4902 if (Args) { 4903 llvm::dbgs() << "("; 4904 for (const auto &Arg : Args.value()) 4905 for (const auto &T : Arg) 4906 llvm::dbgs() << T->TokenText << " "; 4907 llvm::dbgs() << ")"; 4908 } 4909 llvm::dbgs() << "\n"; 4910 }); 4911 if (Macros.objectLike(ID->TokenText) && Args && 4912 !Macros.hasArity(ID->TokenText, Args->size())) { 4913 // The macro is either 4914 // - object-like, but we got argumnets, or 4915 // - overloaded to be both object-like and function-like, but none of 4916 // the function-like arities match the number of arguments. 4917 // Thus, expand as object-like macro. 4918 LLVM_DEBUG(llvm::dbgs() 4919 << "Macro \"" << ID->TokenText 4920 << "\" not overloaded for arity " << Args->size() 4921 << "or not function-like, using object-like overload."); 4922 Args.reset(); 4923 UnexpandedLine->Tokens.resize(1); 4924 Tokens->setPosition(Position); 4925 nextToken(); 4926 assert(!Args && Macros.objectLike(ID->TokenText)); 4927 } 4928 if ((!Args && Macros.objectLike(ID->TokenText)) || 4929 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4930 // Next, we insert the expanded tokens in the token stream at the 4931 // current position, and continue parsing. 4932 Unexpanded[ID] = std::move(UnexpandedLine); 4933 SmallVector<FormatToken *, 8> Expansion = 4934 Macros.expand(ID, std::move(Args)); 4935 if (!Expansion.empty()) 4936 FormatTok = Tokens->insertTokens(Expansion); 4937 4938 LLVM_DEBUG({ 4939 llvm::dbgs() << "Expanded: "; 4940 for (const auto &T : Expansion) 4941 llvm::dbgs() << T->TokenText << " "; 4942 llvm::dbgs() << "\n"; 4943 }); 4944 } else { 4945 LLVM_DEBUG({ 4946 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 4947 << "\", because it was used "; 4948 if (Args) 4949 llvm::dbgs() << "with " << Args->size(); 4950 else 4951 llvm::dbgs() << "without"; 4952 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 4953 }); 4954 Tokens->setPosition(Position); 4955 FormatTok = ID; 4956 } 4957 } 4958 4959 if (FormatTok->isNot(tok::comment)) { 4960 distributeComments(Comments, FormatTok); 4961 Comments.clear(); 4962 return; 4963 } 4964 4965 Comments.push_back(FormatTok); 4966 } while (!eof()); 4967 4968 distributeComments(Comments, nullptr); 4969 Comments.clear(); 4970 } 4971 4972 namespace { 4973 template <typename Iterator> 4974 void pushTokens(Iterator Begin, Iterator End, 4975 llvm::SmallVectorImpl<FormatToken *> &Into) { 4976 for (auto I = Begin; I != End; ++I) { 4977 Into.push_back(I->Tok); 4978 for (const auto &Child : I->Children) 4979 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 4980 } 4981 } 4982 } // namespace 4983 4984 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 4985 UnwrappedLineParser::parseMacroCall() { 4986 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 4987 assert(Line->Tokens.empty()); 4988 nextToken(); 4989 if (FormatTok->isNot(tok::l_paren)) 4990 return Args; 4991 unsigned Position = Tokens->getPosition(); 4992 FormatToken *Tok = FormatTok; 4993 nextToken(); 4994 Args.emplace(); 4995 auto ArgStart = std::prev(Line->Tokens.end()); 4996 4997 int Parens = 0; 4998 do { 4999 switch (FormatTok->Tok.getKind()) { 5000 case tok::l_paren: 5001 ++Parens; 5002 nextToken(); 5003 break; 5004 case tok::r_paren: { 5005 if (Parens > 0) { 5006 --Parens; 5007 nextToken(); 5008 break; 5009 } 5010 Args->push_back({}); 5011 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5012 nextToken(); 5013 return Args; 5014 } 5015 case tok::comma: { 5016 if (Parens > 0) { 5017 nextToken(); 5018 break; 5019 } 5020 Args->push_back({}); 5021 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5022 nextToken(); 5023 ArgStart = std::prev(Line->Tokens.end()); 5024 break; 5025 } 5026 default: 5027 nextToken(); 5028 break; 5029 } 5030 } while (!eof()); 5031 Line->Tokens.resize(1); 5032 Tokens->setPosition(Position); 5033 FormatTok = Tok; 5034 return {}; 5035 } 5036 5037 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 5038 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 5039 if (MustBreakBeforeNextToken) { 5040 Line->Tokens.back().Tok->MustBreakBefore = true; 5041 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; 5042 MustBreakBeforeNextToken = false; 5043 } 5044 } 5045 5046 } // end namespace format 5047 } // end namespace clang 5048