1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the implementation of the UnwrappedLineParser, 11 /// which turns a stream of tokens into UnwrappedLines. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "UnwrappedLineParser.h" 16 #include "FormatToken.h" 17 #include "FormatTokenLexer.h" 18 #include "FormatTokenSource.h" 19 #include "Macros.h" 20 #include "TokenAnnotator.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/raw_os_ostream.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <algorithm> 29 #include <utility> 30 31 #define DEBUG_TYPE "format-parser" 32 33 namespace clang { 34 namespace format { 35 36 namespace { 37 38 void printLine(llvm::raw_ostream &OS, const UnwrappedLine &Line, 39 StringRef Prefix = "", bool PrintText = false) { 40 OS << Prefix << "Line(" << Line.Level << ", FSC=" << Line.FirstStartColumn 41 << ")" << (Line.InPPDirective ? " MACRO" : "") << ": "; 42 bool NewLine = false; 43 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 44 E = Line.Tokens.end(); 45 I != E; ++I) { 46 if (NewLine) { 47 OS << Prefix; 48 NewLine = false; 49 } 50 OS << I->Tok->Tok.getName() << "[" 51 << "T=" << (unsigned)I->Tok->getType() 52 << ", OC=" << I->Tok->OriginalColumn << ", \"" << I->Tok->TokenText 53 << "\"] "; 54 for (SmallVectorImpl<UnwrappedLine>::const_iterator 55 CI = I->Children.begin(), 56 CE = I->Children.end(); 57 CI != CE; ++CI) { 58 OS << "\n"; 59 printLine(OS, *CI, (Prefix + " ").str()); 60 NewLine = true; 61 } 62 } 63 if (!NewLine) 64 OS << "\n"; 65 } 66 67 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line) { 68 printLine(llvm::dbgs(), Line); 69 } 70 71 class ScopedDeclarationState { 72 public: 73 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack, 74 bool MustBeDeclaration) 75 : Line(Line), Stack(Stack) { 76 Line.MustBeDeclaration = MustBeDeclaration; 77 Stack.push_back(MustBeDeclaration); 78 } 79 ~ScopedDeclarationState() { 80 Stack.pop_back(); 81 if (!Stack.empty()) 82 Line.MustBeDeclaration = Stack.back(); 83 else 84 Line.MustBeDeclaration = true; 85 } 86 87 private: 88 UnwrappedLine &Line; 89 llvm::BitVector &Stack; 90 }; 91 92 } // end anonymous namespace 93 94 std::ostream &operator<<(std::ostream &Stream, const UnwrappedLine &Line) { 95 llvm::raw_os_ostream OS(Stream); 96 printLine(OS, Line); 97 return Stream; 98 } 99 100 class ScopedLineState { 101 public: 102 ScopedLineState(UnwrappedLineParser &Parser, 103 bool SwitchToPreprocessorLines = false) 104 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 105 if (SwitchToPreprocessorLines) 106 Parser.CurrentLines = &Parser.PreprocessorDirectives; 107 else if (!Parser.Line->Tokens.empty()) 108 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 109 PreBlockLine = std::move(Parser.Line); 110 Parser.Line = std::make_unique<UnwrappedLine>(); 111 Parser.Line->Level = PreBlockLine->Level; 112 Parser.Line->PPLevel = PreBlockLine->PPLevel; 113 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 114 Parser.Line->InMacroBody = PreBlockLine->InMacroBody; 115 Parser.Line->UnbracedBodyLevel = PreBlockLine->UnbracedBodyLevel; 116 } 117 118 ~ScopedLineState() { 119 if (!Parser.Line->Tokens.empty()) 120 Parser.addUnwrappedLine(); 121 assert(Parser.Line->Tokens.empty()); 122 Parser.Line = std::move(PreBlockLine); 123 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 124 Parser.MustBreakBeforeNextToken = true; 125 Parser.CurrentLines = OriginalLines; 126 } 127 128 private: 129 UnwrappedLineParser &Parser; 130 131 std::unique_ptr<UnwrappedLine> PreBlockLine; 132 SmallVectorImpl<UnwrappedLine> *OriginalLines; 133 }; 134 135 class CompoundStatementIndenter { 136 public: 137 CompoundStatementIndenter(UnwrappedLineParser *Parser, 138 const FormatStyle &Style, unsigned &LineLevel) 139 : CompoundStatementIndenter(Parser, LineLevel, 140 Style.BraceWrapping.AfterControlStatement, 141 Style.BraceWrapping.IndentBraces) {} 142 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel, 143 bool WrapBrace, bool IndentBrace) 144 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 145 if (WrapBrace) 146 Parser->addUnwrappedLine(); 147 if (IndentBrace) 148 ++LineLevel; 149 } 150 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 151 152 private: 153 unsigned &LineLevel; 154 unsigned OldLineLevel; 155 }; 156 157 UnwrappedLineParser::UnwrappedLineParser( 158 SourceManager &SourceMgr, const FormatStyle &Style, 159 const AdditionalKeywords &Keywords, unsigned FirstStartColumn, 160 ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, 161 llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, 162 IdentifierTable &IdentTable) 163 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 164 CurrentLines(&Lines), Style(Style), IsCpp(Style.isCpp()), 165 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), 166 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), 167 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), 168 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None 169 ? IG_Rejected 170 : IG_Inited), 171 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), 172 Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) { 173 assert(IsCpp == LangOpts.CXXOperatorNames); 174 } 175 176 void UnwrappedLineParser::reset() { 177 PPBranchLevel = -1; 178 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None 179 ? IG_Rejected 180 : IG_Inited; 181 IncludeGuardToken = nullptr; 182 Line.reset(new UnwrappedLine); 183 CommentsBeforeNextToken.clear(); 184 FormatTok = nullptr; 185 MustBreakBeforeNextToken = false; 186 IsDecltypeAutoFunction = false; 187 PreprocessorDirectives.clear(); 188 CurrentLines = &Lines; 189 DeclarationScopeStack.clear(); 190 NestedTooDeep.clear(); 191 NestedLambdas.clear(); 192 PPStack.clear(); 193 Line->FirstStartColumn = FirstStartColumn; 194 195 if (!Unexpanded.empty()) 196 for (FormatToken *Token : AllTokens) 197 Token->MacroCtx.reset(); 198 CurrentExpandedLines.clear(); 199 ExpandedLines.clear(); 200 Unexpanded.clear(); 201 InExpansion = false; 202 Reconstruct.reset(); 203 } 204 205 void UnwrappedLineParser::parse() { 206 IndexedTokenSource TokenSource(AllTokens); 207 Line->FirstStartColumn = FirstStartColumn; 208 do { 209 LLVM_DEBUG(llvm::dbgs() << "----\n"); 210 reset(); 211 Tokens = &TokenSource; 212 TokenSource.reset(); 213 214 readToken(); 215 parseFile(); 216 217 // If we found an include guard then all preprocessor directives (other than 218 // the guard) are over-indented by one. 219 if (IncludeGuard == IG_Found) { 220 for (auto &Line : Lines) 221 if (Line.InPPDirective && Line.Level > 0) 222 --Line.Level; 223 } 224 225 // Create line with eof token. 226 assert(eof()); 227 pushToken(FormatTok); 228 addUnwrappedLine(); 229 230 // In a first run, format everything with the lines containing macro calls 231 // replaced by the expansion. 232 if (!ExpandedLines.empty()) { 233 LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); 234 for (const auto &Line : Lines) { 235 if (!Line.Tokens.empty()) { 236 auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); 237 if (it != ExpandedLines.end()) { 238 for (const auto &Expanded : it->second) { 239 LLVM_DEBUG(printDebugInfo(Expanded)); 240 Callback.consumeUnwrappedLine(Expanded); 241 } 242 continue; 243 } 244 } 245 LLVM_DEBUG(printDebugInfo(Line)); 246 Callback.consumeUnwrappedLine(Line); 247 } 248 Callback.finishRun(); 249 } 250 251 LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); 252 for (const UnwrappedLine &Line : Lines) { 253 LLVM_DEBUG(printDebugInfo(Line)); 254 Callback.consumeUnwrappedLine(Line); 255 } 256 Callback.finishRun(); 257 Lines.clear(); 258 while (!PPLevelBranchIndex.empty() && 259 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 260 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 261 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 262 } 263 if (!PPLevelBranchIndex.empty()) { 264 ++PPLevelBranchIndex.back(); 265 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 266 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 267 } 268 } while (!PPLevelBranchIndex.empty()); 269 } 270 271 void UnwrappedLineParser::parseFile() { 272 // The top-level context in a file always has declarations, except for pre- 273 // processor directives and JavaScript files. 274 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript(); 275 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 276 MustBeDeclaration); 277 if (Style.Language == FormatStyle::LK_TextProto) 278 parseBracedList(); 279 else 280 parseLevel(); 281 // Make sure to format the remaining tokens. 282 // 283 // LK_TextProto is special since its top-level is parsed as the body of a 284 // braced list, which does not necessarily have natural line separators such 285 // as a semicolon. Comments after the last entry that have been determined to 286 // not belong to that line, as in: 287 // key: value 288 // // endfile comment 289 // do not have a chance to be put on a line of their own until this point. 290 // Here we add this newline before end-of-file comments. 291 if (Style.Language == FormatStyle::LK_TextProto && 292 !CommentsBeforeNextToken.empty()) { 293 addUnwrappedLine(); 294 } 295 flushComments(true); 296 addUnwrappedLine(); 297 } 298 299 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() { 300 do { 301 switch (FormatTok->Tok.getKind()) { 302 case tok::l_brace: 303 return; 304 default: 305 if (FormatTok->is(Keywords.kw_where)) { 306 addUnwrappedLine(); 307 nextToken(); 308 parseCSharpGenericTypeConstraint(); 309 break; 310 } 311 nextToken(); 312 break; 313 } 314 } while (!eof()); 315 } 316 317 void UnwrappedLineParser::parseCSharpAttribute() { 318 int UnpairedSquareBrackets = 1; 319 do { 320 switch (FormatTok->Tok.getKind()) { 321 case tok::r_square: 322 nextToken(); 323 --UnpairedSquareBrackets; 324 if (UnpairedSquareBrackets == 0) { 325 addUnwrappedLine(); 326 return; 327 } 328 break; 329 case tok::l_square: 330 ++UnpairedSquareBrackets; 331 nextToken(); 332 break; 333 default: 334 nextToken(); 335 break; 336 } 337 } while (!eof()); 338 } 339 340 bool UnwrappedLineParser::precededByCommentOrPPDirective() const { 341 if (!Lines.empty() && Lines.back().InPPDirective) 342 return true; 343 344 const FormatToken *Previous = Tokens->getPreviousToken(); 345 return Previous && Previous->is(tok::comment) && 346 (Previous->IsMultiline || Previous->NewlinesBefore > 0); 347 } 348 349 /// \brief Parses a level, that is ???. 350 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level. 351 /// \param IfKind The \p if statement kind in the level. 352 /// \param IfLeftBrace The left brace of the \p if block in the level. 353 /// \returns true if a simple block of if/else/for/while, or false otherwise. 354 /// (A simple block has a single statement.) 355 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace, 356 IfStmtKind *IfKind, 357 FormatToken **IfLeftBrace) { 358 const bool InRequiresExpression = 359 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 360 const bool IsPrecededByCommentOrPPDirective = 361 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective(); 362 FormatToken *IfLBrace = nullptr; 363 bool HasDoWhile = false; 364 bool HasLabel = false; 365 unsigned StatementCount = 0; 366 bool SwitchLabelEncountered = false; 367 368 do { 369 if (FormatTok->isAttribute()) { 370 nextToken(); 371 if (FormatTok->is(tok::l_paren)) 372 parseParens(); 373 continue; 374 } 375 tok::TokenKind Kind = FormatTok->Tok.getKind(); 376 if (FormatTok->is(TT_MacroBlockBegin)) 377 Kind = tok::l_brace; 378 else if (FormatTok->is(TT_MacroBlockEnd)) 379 Kind = tok::r_brace; 380 381 auto ParseDefault = [this, OpeningBrace, IfKind, &IfLBrace, &HasDoWhile, 382 &HasLabel, &StatementCount] { 383 parseStructuralElement(OpeningBrace, IfKind, &IfLBrace, 384 HasDoWhile ? nullptr : &HasDoWhile, 385 HasLabel ? nullptr : &HasLabel); 386 ++StatementCount; 387 assert(StatementCount > 0 && "StatementCount overflow!"); 388 }; 389 390 switch (Kind) { 391 case tok::comment: 392 nextToken(); 393 addUnwrappedLine(); 394 break; 395 case tok::l_brace: 396 if (InRequiresExpression) { 397 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 398 } else if (FormatTok->Previous && 399 FormatTok->Previous->ClosesRequiresClause) { 400 // We need the 'default' case here to correctly parse a function 401 // l_brace. 402 ParseDefault(); 403 continue; 404 } 405 if (!InRequiresExpression && FormatTok->isNot(TT_MacroBlockBegin)) { 406 if (tryToParseBracedList()) 407 continue; 408 FormatTok->setFinalizedType(TT_BlockLBrace); 409 } 410 parseBlock(); 411 ++StatementCount; 412 assert(StatementCount > 0 && "StatementCount overflow!"); 413 addUnwrappedLine(); 414 break; 415 case tok::r_brace: 416 if (OpeningBrace) { 417 if (!Style.RemoveBracesLLVM || Line->InPPDirective || 418 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) { 419 return false; 420 } 421 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel || 422 HasDoWhile || IsPrecededByCommentOrPPDirective || 423 precededByCommentOrPPDirective()) { 424 return false; 425 } 426 const FormatToken *Next = Tokens->peekNextToken(); 427 if (Next->is(tok::comment) && Next->NewlinesBefore == 0) 428 return false; 429 if (IfLeftBrace) 430 *IfLeftBrace = IfLBrace; 431 return true; 432 } 433 nextToken(); 434 addUnwrappedLine(); 435 break; 436 case tok::kw_default: { 437 unsigned StoredPosition = Tokens->getPosition(); 438 auto *Next = Tokens->getNextNonComment(); 439 FormatTok = Tokens->setPosition(StoredPosition); 440 if (!Next->isOneOf(tok::colon, tok::arrow)) { 441 // default not followed by `:` or `->` is not a case label; treat it 442 // like an identifier. 443 parseStructuralElement(); 444 break; 445 } 446 // Else, if it is 'default:', fall through to the case handling. 447 [[fallthrough]]; 448 } 449 case tok::kw_case: 450 if (Style.Language == FormatStyle::LK_Proto || Style.isVerilog() || 451 (Style.isJavaScript() && Line->MustBeDeclaration)) { 452 // Proto: there are no switch/case statements 453 // Verilog: Case labels don't have this word. We handle case 454 // labels including default in TokenAnnotator. 455 // JavaScript: A 'case: string' style field declaration. 456 ParseDefault(); 457 break; 458 } 459 if (!SwitchLabelEncountered && 460 (Style.IndentCaseLabels || 461 (OpeningBrace && OpeningBrace->is(TT_SwitchExpressionLBrace)) || 462 (Line->InPPDirective && Line->Level == 1))) { 463 ++Line->Level; 464 } 465 SwitchLabelEncountered = true; 466 parseStructuralElement(); 467 break; 468 case tok::l_square: 469 if (Style.isCSharp()) { 470 nextToken(); 471 parseCSharpAttribute(); 472 break; 473 } 474 if (handleCppAttributes()) 475 break; 476 [[fallthrough]]; 477 default: 478 ParseDefault(); 479 break; 480 } 481 } while (!eof()); 482 483 return false; 484 } 485 486 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 487 // We'll parse forward through the tokens until we hit 488 // a closing brace or eof - note that getNextToken() will 489 // parse macros, so this will magically work inside macro 490 // definitions, too. 491 unsigned StoredPosition = Tokens->getPosition(); 492 FormatToken *Tok = FormatTok; 493 const FormatToken *PrevTok = Tok->Previous; 494 // Keep a stack of positions of lbrace tokens. We will 495 // update information about whether an lbrace starts a 496 // braced init list or a different block during the loop. 497 struct StackEntry { 498 FormatToken *Tok; 499 const FormatToken *PrevTok; 500 }; 501 SmallVector<StackEntry, 8> LBraceStack; 502 assert(Tok->is(tok::l_brace)); 503 504 do { 505 auto *NextTok = Tokens->getNextNonComment(); 506 507 if (!Line->InMacroBody && !Style.isTableGen()) { 508 // Skip PPDirective lines and comments. 509 while (NextTok->is(tok::hash)) { 510 NextTok = Tokens->getNextToken(); 511 if (NextTok->is(tok::pp_not_keyword)) 512 break; 513 do { 514 NextTok = Tokens->getNextToken(); 515 } while (NextTok->NewlinesBefore == 0 && NextTok->isNot(tok::eof)); 516 517 while (NextTok->is(tok::comment)) 518 NextTok = Tokens->getNextToken(); 519 } 520 } 521 522 switch (Tok->Tok.getKind()) { 523 case tok::l_brace: 524 if (Style.isJavaScript() && PrevTok) { 525 if (PrevTok->isOneOf(tok::colon, tok::less)) { 526 // A ':' indicates this code is in a type, or a braced list 527 // following a label in an object literal ({a: {b: 1}}). 528 // A '<' could be an object used in a comparison, but that is nonsense 529 // code (can never return true), so more likely it is a generic type 530 // argument (`X<{a: string; b: number}>`). 531 // The code below could be confused by semicolons between the 532 // individual members in a type member list, which would normally 533 // trigger BK_Block. In both cases, this must be parsed as an inline 534 // braced init. 535 Tok->setBlockKind(BK_BracedInit); 536 } else if (PrevTok->is(tok::r_paren)) { 537 // `) { }` can only occur in function or method declarations in JS. 538 Tok->setBlockKind(BK_Block); 539 } 540 } else { 541 Tok->setBlockKind(BK_Unknown); 542 } 543 LBraceStack.push_back({Tok, PrevTok}); 544 break; 545 case tok::r_brace: 546 if (LBraceStack.empty()) 547 break; 548 if (auto *LBrace = LBraceStack.back().Tok; LBrace->is(BK_Unknown)) { 549 bool ProbablyBracedList = false; 550 if (Style.Language == FormatStyle::LK_Proto) { 551 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 552 } else if (LBrace->isNot(TT_EnumLBrace)) { 553 // Using OriginalColumn to distinguish between ObjC methods and 554 // binary operators is a bit hacky. 555 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 556 NextTok->OriginalColumn == 0; 557 558 // Try to detect a braced list. Note that regardless how we mark inner 559 // braces here, we will overwrite the BlockKind later if we parse a 560 // braced list (where all blocks inside are by default braced lists), 561 // or when we explicitly detect blocks (for example while parsing 562 // lambdas). 563 564 // If we already marked the opening brace as braced list, the closing 565 // must also be part of it. 566 ProbablyBracedList = LBrace->is(TT_BracedListLBrace); 567 568 ProbablyBracedList = ProbablyBracedList || 569 (Style.isJavaScript() && 570 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, 571 Keywords.kw_as)); 572 ProbablyBracedList = 573 ProbablyBracedList || (IsCpp && NextTok->is(tok::l_paren)); 574 575 // If there is a comma, semicolon or right paren after the closing 576 // brace, we assume this is a braced initializer list. 577 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a 578 // braced list in JS. 579 ProbablyBracedList = 580 ProbablyBracedList || 581 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 582 tok::r_paren, tok::r_square, tok::ellipsis); 583 584 // Distinguish between braced list in a constructor initializer list 585 // followed by constructor body, or just adjacent blocks. 586 ProbablyBracedList = 587 ProbablyBracedList || 588 (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && 589 LBraceStack.back().PrevTok->isOneOf(tok::identifier, 590 tok::greater)); 591 592 ProbablyBracedList = 593 ProbablyBracedList || 594 (NextTok->is(tok::identifier) && 595 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)); 596 597 ProbablyBracedList = ProbablyBracedList || 598 (NextTok->is(tok::semi) && 599 (!ExpectClassBody || LBraceStack.size() != 1)); 600 601 ProbablyBracedList = 602 ProbablyBracedList || 603 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 604 605 if (!Style.isCSharp() && NextTok->is(tok::l_square)) { 606 // We can have an array subscript after a braced init 607 // list, but C++11 attributes are expected after blocks. 608 NextTok = Tokens->getNextToken(); 609 ProbablyBracedList = NextTok->isNot(tok::l_square); 610 } 611 612 // Cpp macro definition body that is a nonempty braced list or block: 613 if (IsCpp && Line->InMacroBody && PrevTok != FormatTok && 614 !FormatTok->Previous && NextTok->is(tok::eof) && 615 // A statement can end with only `;` (simple statement), a block 616 // closing brace (compound statement), or `:` (label statement). 617 // If PrevTok is a block opening brace, Tok ends an empty block. 618 !PrevTok->isOneOf(tok::semi, BK_Block, tok::colon)) { 619 ProbablyBracedList = true; 620 } 621 } 622 const auto BlockKind = ProbablyBracedList ? BK_BracedInit : BK_Block; 623 Tok->setBlockKind(BlockKind); 624 LBrace->setBlockKind(BlockKind); 625 } 626 LBraceStack.pop_back(); 627 break; 628 case tok::identifier: 629 if (Tok->isNot(TT_StatementMacro)) 630 break; 631 [[fallthrough]]; 632 case tok::at: 633 case tok::semi: 634 case tok::kw_if: 635 case tok::kw_while: 636 case tok::kw_for: 637 case tok::kw_switch: 638 case tok::kw_try: 639 case tok::kw___try: 640 if (!LBraceStack.empty() && LBraceStack.back().Tok->is(BK_Unknown)) 641 LBraceStack.back().Tok->setBlockKind(BK_Block); 642 break; 643 default: 644 break; 645 } 646 647 PrevTok = Tok; 648 Tok = NextTok; 649 } while (Tok->isNot(tok::eof) && !LBraceStack.empty()); 650 651 // Assume other blocks for all unclosed opening braces. 652 for (const auto &Entry : LBraceStack) 653 if (Entry.Tok->is(BK_Unknown)) 654 Entry.Tok->setBlockKind(BK_Block); 655 656 FormatTok = Tokens->setPosition(StoredPosition); 657 } 658 659 // Sets the token type of the directly previous right brace. 660 void UnwrappedLineParser::setPreviousRBraceType(TokenType Type) { 661 if (auto Prev = FormatTok->getPreviousNonComment(); 662 Prev && Prev->is(tok::r_brace)) { 663 Prev->setFinalizedType(Type); 664 } 665 } 666 667 template <class T> 668 static inline void hash_combine(std::size_t &seed, const T &v) { 669 std::hash<T> hasher; 670 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); 671 } 672 673 size_t UnwrappedLineParser::computePPHash() const { 674 size_t h = 0; 675 for (const auto &i : PPStack) { 676 hash_combine(h, size_t(i.Kind)); 677 hash_combine(h, i.Line); 678 } 679 return h; 680 } 681 682 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace 683 // is not null, subtracts its length (plus the preceding space) when computing 684 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before 685 // running the token annotator on it so that we can restore them afterward. 686 bool UnwrappedLineParser::mightFitOnOneLine( 687 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const { 688 const auto ColumnLimit = Style.ColumnLimit; 689 if (ColumnLimit == 0) 690 return true; 691 692 auto &Tokens = ParsedLine.Tokens; 693 assert(!Tokens.empty()); 694 695 const auto *LastToken = Tokens.back().Tok; 696 assert(LastToken); 697 698 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size()); 699 700 int Index = 0; 701 for (const auto &Token : Tokens) { 702 assert(Token.Tok); 703 auto &SavedToken = SavedTokens[Index++]; 704 SavedToken.Tok = new FormatToken; 705 SavedToken.Tok->copyFrom(*Token.Tok); 706 SavedToken.Children = std::move(Token.Children); 707 } 708 709 AnnotatedLine Line(ParsedLine); 710 assert(Line.Last == LastToken); 711 712 TokenAnnotator Annotator(Style, Keywords); 713 Annotator.annotate(Line); 714 Annotator.calculateFormattingInformation(Line); 715 716 auto Length = LastToken->TotalLength; 717 if (OpeningBrace) { 718 assert(OpeningBrace != Tokens.front().Tok); 719 if (auto Prev = OpeningBrace->Previous; 720 Prev && Prev->TotalLength + ColumnLimit == OpeningBrace->TotalLength) { 721 Length -= ColumnLimit; 722 } 723 Length -= OpeningBrace->TokenText.size() + 1; 724 } 725 726 if (const auto *FirstToken = Line.First; FirstToken->is(tok::r_brace)) { 727 assert(!OpeningBrace || OpeningBrace->is(TT_ControlStatementLBrace)); 728 Length -= FirstToken->TokenText.size() + 1; 729 } 730 731 Index = 0; 732 for (auto &Token : Tokens) { 733 const auto &SavedToken = SavedTokens[Index++]; 734 Token.Tok->copyFrom(*SavedToken.Tok); 735 Token.Children = std::move(SavedToken.Children); 736 delete SavedToken.Tok; 737 } 738 739 // If these change PPLevel needs to be used for get correct indentation. 740 assert(!Line.InMacroBody); 741 assert(!Line.InPPDirective); 742 return Line.Level * Style.IndentWidth + Length <= ColumnLimit; 743 } 744 745 FormatToken *UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 746 unsigned AddLevels, bool MunchSemi, 747 bool KeepBraces, 748 IfStmtKind *IfKind, 749 bool UnindentWhitesmithsBraces) { 750 auto HandleVerilogBlockLabel = [this]() { 751 // ":" name 752 if (Style.isVerilog() && FormatTok->is(tok::colon)) { 753 nextToken(); 754 if (Keywords.isVerilogIdentifier(*FormatTok)) 755 nextToken(); 756 } 757 }; 758 759 // Whether this is a Verilog-specific block that has a special header like a 760 // module. 761 const bool VerilogHierarchy = 762 Style.isVerilog() && Keywords.isVerilogHierarchy(*FormatTok); 763 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) || 764 (Style.isVerilog() && 765 (Keywords.isVerilogBegin(*FormatTok) || VerilogHierarchy))) && 766 "'{' or macro block token expected"); 767 FormatToken *Tok = FormatTok; 768 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment); 769 auto Index = CurrentLines->size(); 770 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); 771 FormatTok->setBlockKind(BK_Block); 772 773 // For Whitesmiths mode, jump to the next level prior to skipping over the 774 // braces. 775 if (!VerilogHierarchy && AddLevels > 0 && 776 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 777 ++Line->Level; 778 } 779 780 size_t PPStartHash = computePPHash(); 781 782 const unsigned InitialLevel = Line->Level; 783 if (VerilogHierarchy) { 784 AddLevels += parseVerilogHierarchyHeader(); 785 } else { 786 nextToken(/*LevelDifference=*/AddLevels); 787 HandleVerilogBlockLabel(); 788 } 789 790 // Bail out if there are too many levels. Otherwise, the stack might overflow. 791 if (Line->Level > 300) 792 return nullptr; 793 794 if (MacroBlock && FormatTok->is(tok::l_paren)) 795 parseParens(); 796 797 size_t NbPreprocessorDirectives = 798 !parsingPPDirective() ? PreprocessorDirectives.size() : 0; 799 addUnwrappedLine(); 800 size_t OpeningLineIndex = 801 CurrentLines->empty() 802 ? (UnwrappedLine::kInvalidIndex) 803 : (CurrentLines->size() - 1 - NbPreprocessorDirectives); 804 805 // Whitesmiths is weird here. The brace needs to be indented for the namespace 806 // block, but the block itself may not be indented depending on the style 807 // settings. This allows the format to back up one level in those cases. 808 if (UnindentWhitesmithsBraces) 809 --Line->Level; 810 811 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 812 MustBeDeclaration); 813 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths) 814 Line->Level += AddLevels; 815 816 FormatToken *IfLBrace = nullptr; 817 const bool SimpleBlock = parseLevel(Tok, IfKind, &IfLBrace); 818 819 if (eof()) 820 return IfLBrace; 821 822 if (MacroBlock ? FormatTok->isNot(TT_MacroBlockEnd) 823 : FormatTok->isNot(tok::r_brace)) { 824 Line->Level = InitialLevel; 825 FormatTok->setBlockKind(BK_Block); 826 return IfLBrace; 827 } 828 829 if (FormatTok->is(tok::r_brace)) { 830 FormatTok->setBlockKind(BK_Block); 831 if (Tok->is(TT_NamespaceLBrace)) 832 FormatTok->setFinalizedType(TT_NamespaceRBrace); 833 } 834 835 const bool IsFunctionRBrace = 836 FormatTok->is(tok::r_brace) && Tok->is(TT_FunctionLBrace); 837 838 auto RemoveBraces = [=]() mutable { 839 if (!SimpleBlock) 840 return false; 841 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)); 842 assert(FormatTok->is(tok::r_brace)); 843 const bool WrappedOpeningBrace = !Tok->Previous; 844 if (WrappedOpeningBrace && FollowedByComment) 845 return false; 846 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional; 847 if (KeepBraces && !HasRequiredIfBraces) 848 return false; 849 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) { 850 const FormatToken *Previous = Tokens->getPreviousToken(); 851 assert(Previous); 852 if (Previous->is(tok::r_brace) && !Previous->Optional) 853 return false; 854 } 855 assert(!CurrentLines->empty()); 856 auto &LastLine = CurrentLines->back(); 857 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine)) 858 return false; 859 if (Tok->is(TT_ElseLBrace)) 860 return true; 861 if (WrappedOpeningBrace) { 862 assert(Index > 0); 863 --Index; // The line above the wrapped l_brace. 864 Tok = nullptr; 865 } 866 return mightFitOnOneLine((*CurrentLines)[Index], Tok); 867 }; 868 if (RemoveBraces()) { 869 Tok->MatchingParen = FormatTok; 870 FormatTok->MatchingParen = Tok; 871 } 872 873 size_t PPEndHash = computePPHash(); 874 875 // Munch the closing brace. 876 nextToken(/*LevelDifference=*/-AddLevels); 877 878 // When this is a function block and there is an unnecessary semicolon 879 // afterwards then mark it as optional (so the RemoveSemi pass can get rid of 880 // it later). 881 if (Style.RemoveSemicolon && IsFunctionRBrace) { 882 while (FormatTok->is(tok::semi)) { 883 FormatTok->Optional = true; 884 nextToken(); 885 } 886 } 887 888 HandleVerilogBlockLabel(); 889 890 if (MacroBlock && FormatTok->is(tok::l_paren)) 891 parseParens(); 892 893 Line->Level = InitialLevel; 894 895 if (FormatTok->is(tok::kw_noexcept)) { 896 // A noexcept in a requires expression. 897 nextToken(); 898 } 899 900 if (FormatTok->is(tok::arrow)) { 901 // Following the } or noexcept we can find a trailing return type arrow 902 // as part of an implicit conversion constraint. 903 nextToken(); 904 parseStructuralElement(); 905 } 906 907 if (MunchSemi && FormatTok->is(tok::semi)) 908 nextToken(); 909 910 if (PPStartHash == PPEndHash) { 911 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; 912 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { 913 // Update the opening line to add the forward reference as well 914 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = 915 CurrentLines->size() - 1; 916 } 917 } 918 919 return IfLBrace; 920 } 921 922 static bool isGoogScope(const UnwrappedLine &Line) { 923 // FIXME: Closure-library specific stuff should not be hard-coded but be 924 // configurable. 925 if (Line.Tokens.size() < 4) 926 return false; 927 auto I = Line.Tokens.begin(); 928 if (I->Tok->TokenText != "goog") 929 return false; 930 ++I; 931 if (I->Tok->isNot(tok::period)) 932 return false; 933 ++I; 934 if (I->Tok->TokenText != "scope") 935 return false; 936 ++I; 937 return I->Tok->is(tok::l_paren); 938 } 939 940 static bool isIIFE(const UnwrappedLine &Line, 941 const AdditionalKeywords &Keywords) { 942 // Look for the start of an immediately invoked anonymous function. 943 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression 944 // This is commonly done in JavaScript to create a new, anonymous scope. 945 // Example: (function() { ... })() 946 if (Line.Tokens.size() < 3) 947 return false; 948 auto I = Line.Tokens.begin(); 949 if (I->Tok->isNot(tok::l_paren)) 950 return false; 951 ++I; 952 if (I->Tok->isNot(Keywords.kw_function)) 953 return false; 954 ++I; 955 return I->Tok->is(tok::l_paren); 956 } 957 958 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 959 const FormatToken &InitialToken) { 960 tok::TokenKind Kind = InitialToken.Tok.getKind(); 961 if (InitialToken.is(TT_NamespaceMacro)) 962 Kind = tok::kw_namespace; 963 964 switch (Kind) { 965 case tok::kw_namespace: 966 return Style.BraceWrapping.AfterNamespace; 967 case tok::kw_class: 968 return Style.BraceWrapping.AfterClass; 969 case tok::kw_union: 970 return Style.BraceWrapping.AfterUnion; 971 case tok::kw_struct: 972 return Style.BraceWrapping.AfterStruct; 973 case tok::kw_enum: 974 return Style.BraceWrapping.AfterEnum; 975 default: 976 return false; 977 } 978 } 979 980 void UnwrappedLineParser::parseChildBlock() { 981 assert(FormatTok->is(tok::l_brace)); 982 FormatTok->setBlockKind(BK_Block); 983 const FormatToken *OpeningBrace = FormatTok; 984 nextToken(); 985 { 986 bool SkipIndent = (Style.isJavaScript() && 987 (isGoogScope(*Line) || isIIFE(*Line, Keywords))); 988 ScopedLineState LineState(*this); 989 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 990 /*MustBeDeclaration=*/false); 991 Line->Level += SkipIndent ? 0 : 1; 992 parseLevel(OpeningBrace); 993 flushComments(isOnNewLine(*FormatTok)); 994 Line->Level -= SkipIndent ? 0 : 1; 995 } 996 nextToken(); 997 } 998 999 void UnwrappedLineParser::parsePPDirective() { 1000 assert(FormatTok->is(tok::hash) && "'#' expected"); 1001 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 1002 1003 nextToken(); 1004 1005 if (!FormatTok->Tok.getIdentifierInfo()) { 1006 parsePPUnknown(); 1007 return; 1008 } 1009 1010 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 1011 case tok::pp_define: 1012 parsePPDefine(); 1013 return; 1014 case tok::pp_if: 1015 parsePPIf(/*IfDef=*/false); 1016 break; 1017 case tok::pp_ifdef: 1018 case tok::pp_ifndef: 1019 parsePPIf(/*IfDef=*/true); 1020 break; 1021 case tok::pp_else: 1022 case tok::pp_elifdef: 1023 case tok::pp_elifndef: 1024 case tok::pp_elif: 1025 parsePPElse(); 1026 break; 1027 case tok::pp_endif: 1028 parsePPEndIf(); 1029 break; 1030 case tok::pp_pragma: 1031 parsePPPragma(); 1032 break; 1033 default: 1034 parsePPUnknown(); 1035 break; 1036 } 1037 } 1038 1039 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 1040 size_t Line = CurrentLines->size(); 1041 if (CurrentLines == &PreprocessorDirectives) 1042 Line += Lines.size(); 1043 1044 if (Unreachable || 1045 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) { 1046 PPStack.push_back({PP_Unreachable, Line}); 1047 } else { 1048 PPStack.push_back({PP_Conditional, Line}); 1049 } 1050 } 1051 1052 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 1053 ++PPBranchLevel; 1054 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 1055 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 1056 PPLevelBranchIndex.push_back(0); 1057 PPLevelBranchCount.push_back(0); 1058 } 1059 PPChainBranchIndex.push(Unreachable ? -1 : 0); 1060 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 1061 conditionalCompilationCondition(Unreachable || Skip); 1062 } 1063 1064 void UnwrappedLineParser::conditionalCompilationAlternative() { 1065 if (!PPStack.empty()) 1066 PPStack.pop_back(); 1067 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1068 if (!PPChainBranchIndex.empty()) 1069 ++PPChainBranchIndex.top(); 1070 conditionalCompilationCondition( 1071 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 1072 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 1073 } 1074 1075 void UnwrappedLineParser::conditionalCompilationEnd() { 1076 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 1077 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 1078 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) 1079 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 1080 } 1081 // Guard against #endif's without #if. 1082 if (PPBranchLevel > -1) 1083 --PPBranchLevel; 1084 if (!PPChainBranchIndex.empty()) 1085 PPChainBranchIndex.pop(); 1086 if (!PPStack.empty()) 1087 PPStack.pop_back(); 1088 } 1089 1090 void UnwrappedLineParser::parsePPIf(bool IfDef) { 1091 bool IfNDef = FormatTok->is(tok::pp_ifndef); 1092 nextToken(); 1093 bool Unreachable = false; 1094 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) 1095 Unreachable = true; 1096 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") 1097 Unreachable = true; 1098 conditionalCompilationStart(Unreachable); 1099 FormatToken *IfCondition = FormatTok; 1100 // If there's a #ifndef on the first line, and the only lines before it are 1101 // comments, it could be an include guard. 1102 bool MaybeIncludeGuard = IfNDef; 1103 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1104 for (auto &Line : Lines) { 1105 if (Line.Tokens.front().Tok->isNot(tok::comment)) { 1106 MaybeIncludeGuard = false; 1107 IncludeGuard = IG_Rejected; 1108 break; 1109 } 1110 } 1111 } 1112 --PPBranchLevel; 1113 parsePPUnknown(); 1114 ++PPBranchLevel; 1115 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { 1116 IncludeGuard = IG_IfNdefed; 1117 IncludeGuardToken = IfCondition; 1118 } 1119 } 1120 1121 void UnwrappedLineParser::parsePPElse() { 1122 // If a potential include guard has an #else, it's not an include guard. 1123 if (IncludeGuard == IG_Defined && PPBranchLevel == 0) 1124 IncludeGuard = IG_Rejected; 1125 // Don't crash when there is an #else without an #if. 1126 assert(PPBranchLevel >= -1); 1127 if (PPBranchLevel == -1) 1128 conditionalCompilationStart(/*Unreachable=*/true); 1129 conditionalCompilationAlternative(); 1130 --PPBranchLevel; 1131 parsePPUnknown(); 1132 ++PPBranchLevel; 1133 } 1134 1135 void UnwrappedLineParser::parsePPEndIf() { 1136 conditionalCompilationEnd(); 1137 parsePPUnknown(); 1138 // If the #endif of a potential include guard is the last thing in the file, 1139 // then we found an include guard. 1140 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() && 1141 Style.IndentPPDirectives != FormatStyle::PPDIS_None) { 1142 IncludeGuard = IG_Found; 1143 } 1144 } 1145 1146 void UnwrappedLineParser::parsePPDefine() { 1147 nextToken(); 1148 1149 if (!FormatTok->Tok.getIdentifierInfo()) { 1150 IncludeGuard = IG_Rejected; 1151 IncludeGuardToken = nullptr; 1152 parsePPUnknown(); 1153 return; 1154 } 1155 1156 if (IncludeGuard == IG_IfNdefed && 1157 IncludeGuardToken->TokenText == FormatTok->TokenText) { 1158 IncludeGuard = IG_Defined; 1159 IncludeGuardToken = nullptr; 1160 for (auto &Line : Lines) { 1161 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { 1162 IncludeGuard = IG_Rejected; 1163 break; 1164 } 1165 } 1166 } 1167 1168 // In the context of a define, even keywords should be treated as normal 1169 // identifiers. Setting the kind to identifier is not enough, because we need 1170 // to treat additional keywords like __except as well, which are already 1171 // identifiers. Setting the identifier info to null interferes with include 1172 // guard processing above, and changes preprocessing nesting. 1173 FormatTok->Tok.setKind(tok::identifier); 1174 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define); 1175 nextToken(); 1176 if (FormatTok->Tok.getKind() == tok::l_paren && 1177 !FormatTok->hasWhitespaceBefore()) { 1178 parseParens(); 1179 } 1180 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1181 Line->Level += PPBranchLevel + 1; 1182 addUnwrappedLine(); 1183 ++Line->Level; 1184 1185 Line->PPLevel = PPBranchLevel + (IncludeGuard == IG_Defined ? 0 : 1); 1186 assert((int)Line->PPLevel >= 0); 1187 Line->InMacroBody = true; 1188 1189 if (Style.SkipMacroDefinitionBody) { 1190 while (!eof()) { 1191 FormatTok->Finalized = true; 1192 FormatTok = Tokens->getNextToken(); 1193 } 1194 addUnwrappedLine(); 1195 return; 1196 } 1197 1198 // Errors during a preprocessor directive can only affect the layout of the 1199 // preprocessor directive, and thus we ignore them. An alternative approach 1200 // would be to use the same approach we use on the file level (no 1201 // re-indentation if there was a structural error) within the macro 1202 // definition. 1203 parseFile(); 1204 } 1205 1206 void UnwrappedLineParser::parsePPPragma() { 1207 Line->InPragmaDirective = true; 1208 parsePPUnknown(); 1209 } 1210 1211 void UnwrappedLineParser::parsePPUnknown() { 1212 do { 1213 nextToken(); 1214 } while (!eof()); 1215 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None) 1216 Line->Level += PPBranchLevel + 1; 1217 addUnwrappedLine(); 1218 } 1219 1220 // Here we exclude certain tokens that are not usually the first token in an 1221 // unwrapped line. This is used in attempt to distinguish macro calls without 1222 // trailing semicolons from other constructs split to several lines. 1223 static bool tokenCanStartNewLine(const FormatToken &Tok) { 1224 // Semicolon can be a null-statement, l_square can be a start of a macro or 1225 // a C++11 attribute, but this doesn't seem to be common. 1226 return !Tok.isOneOf(tok::semi, tok::l_brace, 1227 // Tokens that can only be used as binary operators and a 1228 // part of overloaded operator names. 1229 tok::period, tok::periodstar, tok::arrow, tok::arrowstar, 1230 tok::less, tok::greater, tok::slash, tok::percent, 1231 tok::lessless, tok::greatergreater, tok::equal, 1232 tok::plusequal, tok::minusequal, tok::starequal, 1233 tok::slashequal, tok::percentequal, tok::ampequal, 1234 tok::pipeequal, tok::caretequal, tok::greatergreaterequal, 1235 tok::lesslessequal, 1236 // Colon is used in labels, base class lists, initializer 1237 // lists, range-based for loops, ternary operator, but 1238 // should never be the first token in an unwrapped line. 1239 tok::colon, 1240 // 'noexcept' is a trailing annotation. 1241 tok::kw_noexcept); 1242 } 1243 1244 static bool mustBeJSIdent(const AdditionalKeywords &Keywords, 1245 const FormatToken *FormatTok) { 1246 // FIXME: This returns true for C/C++ keywords like 'struct'. 1247 return FormatTok->is(tok::identifier) && 1248 (!FormatTok->Tok.getIdentifierInfo() || 1249 !FormatTok->isOneOf( 1250 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, 1251 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, 1252 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, 1253 Keywords.kw_let, Keywords.kw_var, tok::kw_const, 1254 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, 1255 Keywords.kw_instanceof, Keywords.kw_interface, 1256 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from)); 1257 } 1258 1259 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, 1260 const FormatToken *FormatTok) { 1261 return FormatTok->Tok.isLiteral() || 1262 FormatTok->isOneOf(tok::kw_true, tok::kw_false) || 1263 mustBeJSIdent(Keywords, FormatTok); 1264 } 1265 1266 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement 1267 // when encountered after a value (see mustBeJSIdentOrValue). 1268 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, 1269 const FormatToken *FormatTok) { 1270 return FormatTok->isOneOf( 1271 tok::kw_return, Keywords.kw_yield, 1272 // conditionals 1273 tok::kw_if, tok::kw_else, 1274 // loops 1275 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, 1276 // switch/case 1277 tok::kw_switch, tok::kw_case, 1278 // exceptions 1279 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, 1280 // declaration 1281 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, 1282 Keywords.kw_async, Keywords.kw_function, 1283 // import/export 1284 Keywords.kw_import, tok::kw_export); 1285 } 1286 1287 // Checks whether a token is a type in K&R C (aka C78). 1288 static bool isC78Type(const FormatToken &Tok) { 1289 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long, 1290 tok::kw_unsigned, tok::kw_float, tok::kw_double, 1291 tok::identifier); 1292 } 1293 1294 // This function checks whether a token starts the first parameter declaration 1295 // in a K&R C (aka C78) function definition, e.g.: 1296 // int f(a, b) 1297 // short a, b; 1298 // { 1299 // return a + b; 1300 // } 1301 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next, 1302 const FormatToken *FuncName) { 1303 assert(Tok); 1304 assert(Next); 1305 assert(FuncName); 1306 1307 if (FuncName->isNot(tok::identifier)) 1308 return false; 1309 1310 const FormatToken *Prev = FuncName->Previous; 1311 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev))) 1312 return false; 1313 1314 if (!isC78Type(*Tok) && 1315 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) { 1316 return false; 1317 } 1318 1319 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo()) 1320 return false; 1321 1322 Tok = Tok->Previous; 1323 if (!Tok || Tok->isNot(tok::r_paren)) 1324 return false; 1325 1326 Tok = Tok->Previous; 1327 if (!Tok || Tok->isNot(tok::identifier)) 1328 return false; 1329 1330 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma); 1331 } 1332 1333 bool UnwrappedLineParser::parseModuleImport() { 1334 assert(FormatTok->is(Keywords.kw_import) && "'import' expected"); 1335 1336 if (auto Token = Tokens->peekNextToken(/*SkipComment=*/true); 1337 !Token->Tok.getIdentifierInfo() && 1338 !Token->isOneOf(tok::colon, tok::less, tok::string_literal)) { 1339 return false; 1340 } 1341 1342 nextToken(); 1343 while (!eof()) { 1344 if (FormatTok->is(tok::colon)) { 1345 FormatTok->setFinalizedType(TT_ModulePartitionColon); 1346 } 1347 // Handle import <foo/bar.h> as we would an include statement. 1348 else if (FormatTok->is(tok::less)) { 1349 nextToken(); 1350 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) { 1351 // Mark tokens up to the trailing line comments as implicit string 1352 // literals. 1353 if (FormatTok->isNot(tok::comment) && 1354 !FormatTok->TokenText.starts_with("//")) { 1355 FormatTok->setFinalizedType(TT_ImplicitStringLiteral); 1356 } 1357 nextToken(); 1358 } 1359 } 1360 if (FormatTok->is(tok::semi)) { 1361 nextToken(); 1362 break; 1363 } 1364 nextToken(); 1365 } 1366 1367 addUnwrappedLine(); 1368 return true; 1369 } 1370 1371 // readTokenWithJavaScriptASI reads the next token and terminates the current 1372 // line if JavaScript Automatic Semicolon Insertion must 1373 // happen between the current token and the next token. 1374 // 1375 // This method is conservative - it cannot cover all edge cases of JavaScript, 1376 // but only aims to correctly handle certain well known cases. It *must not* 1377 // return true in speculative cases. 1378 void UnwrappedLineParser::readTokenWithJavaScriptASI() { 1379 FormatToken *Previous = FormatTok; 1380 readToken(); 1381 FormatToken *Next = FormatTok; 1382 1383 bool IsOnSameLine = 1384 CommentsBeforeNextToken.empty() 1385 ? Next->NewlinesBefore == 0 1386 : CommentsBeforeNextToken.front()->NewlinesBefore == 0; 1387 if (IsOnSameLine) 1388 return; 1389 1390 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); 1391 bool PreviousStartsTemplateExpr = 1392 Previous->is(TT_TemplateString) && Previous->TokenText.ends_with("${"); 1393 if (PreviousMustBeValue || Previous->is(tok::r_paren)) { 1394 // If the line contains an '@' sign, the previous token might be an 1395 // annotation, which can precede another identifier/value. 1396 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) { 1397 return LineNode.Tok->is(tok::at); 1398 }); 1399 if (HasAt) 1400 return; 1401 } 1402 if (Next->is(tok::exclaim) && PreviousMustBeValue) 1403 return addUnwrappedLine(); 1404 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); 1405 bool NextEndsTemplateExpr = 1406 Next->is(TT_TemplateString) && Next->TokenText.starts_with("}"); 1407 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && 1408 (PreviousMustBeValue || 1409 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, 1410 tok::minusminus))) { 1411 return addUnwrappedLine(); 1412 } 1413 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && 1414 isJSDeclOrStmt(Keywords, Next)) { 1415 return addUnwrappedLine(); 1416 } 1417 } 1418 1419 void UnwrappedLineParser::parseStructuralElement( 1420 const FormatToken *OpeningBrace, IfStmtKind *IfKind, 1421 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) { 1422 if (Style.Language == FormatStyle::LK_TableGen && 1423 FormatTok->is(tok::pp_include)) { 1424 nextToken(); 1425 if (FormatTok->is(tok::string_literal)) 1426 nextToken(); 1427 addUnwrappedLine(); 1428 return; 1429 } 1430 1431 if (IsCpp) { 1432 while (FormatTok->is(tok::l_square) && handleCppAttributes()) { 1433 } 1434 } else if (Style.isVerilog()) { 1435 if (Keywords.isVerilogStructuredProcedure(*FormatTok)) { 1436 parseForOrWhileLoop(/*HasParens=*/false); 1437 return; 1438 } 1439 if (FormatTok->isOneOf(Keywords.kw_foreach, Keywords.kw_repeat)) { 1440 parseForOrWhileLoop(); 1441 return; 1442 } 1443 if (FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 1444 Keywords.kw_assume, Keywords.kw_cover)) { 1445 parseIfThenElse(IfKind, /*KeepBraces=*/false, /*IsVerilogAssert=*/true); 1446 return; 1447 } 1448 1449 // Skip things that can exist before keywords like 'if' and 'case'. 1450 while (true) { 1451 if (FormatTok->isOneOf(Keywords.kw_priority, Keywords.kw_unique, 1452 Keywords.kw_unique0)) { 1453 nextToken(); 1454 } else if (FormatTok->is(tok::l_paren) && 1455 Tokens->peekNextToken()->is(tok::star)) { 1456 parseParens(); 1457 } else { 1458 break; 1459 } 1460 } 1461 } 1462 1463 // Tokens that only make sense at the beginning of a line. 1464 if (FormatTok->isAccessSpecifierKeyword()) { 1465 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 1466 Style.isCSharp()) { 1467 nextToken(); 1468 } else { 1469 parseAccessSpecifier(); 1470 } 1471 return; 1472 } 1473 switch (FormatTok->Tok.getKind()) { 1474 case tok::kw_asm: 1475 nextToken(); 1476 if (FormatTok->is(tok::l_brace)) { 1477 FormatTok->setFinalizedType(TT_InlineASMBrace); 1478 nextToken(); 1479 while (FormatTok && !eof()) { 1480 if (FormatTok->is(tok::r_brace)) { 1481 FormatTok->setFinalizedType(TT_InlineASMBrace); 1482 nextToken(); 1483 addUnwrappedLine(); 1484 break; 1485 } 1486 FormatTok->Finalized = true; 1487 nextToken(); 1488 } 1489 } 1490 break; 1491 case tok::kw_namespace: 1492 parseNamespace(); 1493 return; 1494 case tok::kw_if: { 1495 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1496 // field/method declaration. 1497 break; 1498 } 1499 FormatToken *Tok = parseIfThenElse(IfKind); 1500 if (IfLeftBrace) 1501 *IfLeftBrace = Tok; 1502 return; 1503 } 1504 case tok::kw_for: 1505 case tok::kw_while: 1506 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1507 // field/method declaration. 1508 break; 1509 } 1510 parseForOrWhileLoop(); 1511 return; 1512 case tok::kw_do: 1513 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1514 // field/method declaration. 1515 break; 1516 } 1517 parseDoWhile(); 1518 if (HasDoWhile) 1519 *HasDoWhile = true; 1520 return; 1521 case tok::kw_switch: 1522 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1523 // 'switch: string' field declaration. 1524 break; 1525 } 1526 parseSwitch(/*IsExpr=*/false); 1527 return; 1528 case tok::kw_default: { 1529 // In Verilog default along with other labels are handled in the next loop. 1530 if (Style.isVerilog()) 1531 break; 1532 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1533 // 'default: string' field declaration. 1534 break; 1535 } 1536 auto *Default = FormatTok; 1537 nextToken(); 1538 if (FormatTok->is(tok::colon)) { 1539 FormatTok->setFinalizedType(TT_CaseLabelColon); 1540 parseLabel(); 1541 return; 1542 } 1543 if (FormatTok->is(tok::arrow)) { 1544 FormatTok->setFinalizedType(TT_CaseLabelArrow); 1545 Default->setFinalizedType(TT_SwitchExpressionLabel); 1546 parseLabel(); 1547 return; 1548 } 1549 // e.g. "default void f() {}" in a Java interface. 1550 break; 1551 } 1552 case tok::kw_case: 1553 // Proto: there are no switch/case statements. 1554 if (Style.Language == FormatStyle::LK_Proto) { 1555 nextToken(); 1556 return; 1557 } 1558 if (Style.isVerilog()) { 1559 parseBlock(); 1560 addUnwrappedLine(); 1561 return; 1562 } 1563 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1564 // 'case: string' field declaration. 1565 nextToken(); 1566 break; 1567 } 1568 parseCaseLabel(); 1569 return; 1570 case tok::kw_try: 1571 case tok::kw___try: 1572 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1573 // field/method declaration. 1574 break; 1575 } 1576 parseTryCatch(); 1577 return; 1578 case tok::kw_extern: 1579 nextToken(); 1580 if (Style.isVerilog()) { 1581 // In Verilog and extern module declaration looks like a start of module. 1582 // But there is no body and endmodule. So we handle it separately. 1583 if (Keywords.isVerilogHierarchy(*FormatTok)) { 1584 parseVerilogHierarchyHeader(); 1585 return; 1586 } 1587 } else if (FormatTok->is(tok::string_literal)) { 1588 nextToken(); 1589 if (FormatTok->is(tok::l_brace)) { 1590 if (Style.BraceWrapping.AfterExternBlock) 1591 addUnwrappedLine(); 1592 // Either we indent or for backwards compatibility we follow the 1593 // AfterExternBlock style. 1594 unsigned AddLevels = 1595 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) || 1596 (Style.BraceWrapping.AfterExternBlock && 1597 Style.IndentExternBlock == 1598 FormatStyle::IEBS_AfterExternBlock) 1599 ? 1u 1600 : 0u; 1601 parseBlock(/*MustBeDeclaration=*/true, AddLevels); 1602 addUnwrappedLine(); 1603 return; 1604 } 1605 } 1606 break; 1607 case tok::kw_export: 1608 if (Style.isJavaScript()) { 1609 parseJavaScriptEs6ImportExport(); 1610 return; 1611 } 1612 if (IsCpp) { 1613 nextToken(); 1614 if (FormatTok->is(tok::kw_namespace)) { 1615 parseNamespace(); 1616 return; 1617 } 1618 if (FormatTok->is(Keywords.kw_import) && parseModuleImport()) 1619 return; 1620 } 1621 break; 1622 case tok::kw_inline: 1623 nextToken(); 1624 if (FormatTok->is(tok::kw_namespace)) { 1625 parseNamespace(); 1626 return; 1627 } 1628 break; 1629 case tok::identifier: 1630 if (FormatTok->is(TT_ForEachMacro)) { 1631 parseForOrWhileLoop(); 1632 return; 1633 } 1634 if (FormatTok->is(TT_MacroBlockBegin)) { 1635 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 1636 /*MunchSemi=*/false); 1637 return; 1638 } 1639 if (FormatTok->is(Keywords.kw_import)) { 1640 if (Style.isJavaScript()) { 1641 parseJavaScriptEs6ImportExport(); 1642 return; 1643 } 1644 if (Style.Language == FormatStyle::LK_Proto) { 1645 nextToken(); 1646 if (FormatTok->is(tok::kw_public)) 1647 nextToken(); 1648 if (FormatTok->isNot(tok::string_literal)) 1649 return; 1650 nextToken(); 1651 if (FormatTok->is(tok::semi)) 1652 nextToken(); 1653 addUnwrappedLine(); 1654 return; 1655 } 1656 if (IsCpp && parseModuleImport()) 1657 return; 1658 } 1659 if (IsCpp && FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, 1660 Keywords.kw_slots, Keywords.kw_qslots)) { 1661 nextToken(); 1662 if (FormatTok->is(tok::colon)) { 1663 nextToken(); 1664 addUnwrappedLine(); 1665 return; 1666 } 1667 } 1668 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 1669 parseStatementMacro(); 1670 return; 1671 } 1672 if (IsCpp && FormatTok->is(TT_NamespaceMacro)) { 1673 parseNamespace(); 1674 return; 1675 } 1676 // In Verilog labels can be any expression, so we don't do them here. 1677 // JS doesn't have macros, and within classes colons indicate fields, not 1678 // labels. 1679 // TableGen doesn't have labels. 1680 if (!Style.isJavaScript() && !Style.isVerilog() && !Style.isTableGen() && 1681 Tokens->peekNextToken()->is(tok::colon) && !Line->MustBeDeclaration) { 1682 nextToken(); 1683 if (!Line->InMacroBody || CurrentLines->size() > 1) 1684 Line->Tokens.begin()->Tok->MustBreakBefore = true; 1685 FormatTok->setFinalizedType(TT_GotoLabelColon); 1686 parseLabel(!Style.IndentGotoLabels); 1687 if (HasLabel) 1688 *HasLabel = true; 1689 return; 1690 } 1691 // In all other cases, parse the declaration. 1692 break; 1693 default: 1694 break; 1695 } 1696 1697 for (const bool InRequiresExpression = 1698 OpeningBrace && OpeningBrace->is(TT_RequiresExpressionLBrace); 1699 !eof();) { 1700 if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) { 1701 if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true); 1702 Next && Next->isBinaryOperator()) { 1703 FormatTok->Tok.setKind(tok::identifier); 1704 } 1705 } 1706 const FormatToken *Previous = FormatTok->Previous; 1707 switch (FormatTok->Tok.getKind()) { 1708 case tok::at: 1709 nextToken(); 1710 if (FormatTok->is(tok::l_brace)) { 1711 nextToken(); 1712 parseBracedList(); 1713 break; 1714 } else if (Style.Language == FormatStyle::LK_Java && 1715 FormatTok->is(Keywords.kw_interface)) { 1716 nextToken(); 1717 break; 1718 } 1719 switch (FormatTok->Tok.getObjCKeywordID()) { 1720 case tok::objc_public: 1721 case tok::objc_protected: 1722 case tok::objc_package: 1723 case tok::objc_private: 1724 return parseAccessSpecifier(); 1725 case tok::objc_interface: 1726 case tok::objc_implementation: 1727 return parseObjCInterfaceOrImplementation(); 1728 case tok::objc_protocol: 1729 if (parseObjCProtocol()) 1730 return; 1731 break; 1732 case tok::objc_end: 1733 return; // Handled by the caller. 1734 case tok::objc_optional: 1735 case tok::objc_required: 1736 nextToken(); 1737 addUnwrappedLine(); 1738 return; 1739 case tok::objc_autoreleasepool: 1740 nextToken(); 1741 if (FormatTok->is(tok::l_brace)) { 1742 if (Style.BraceWrapping.AfterControlStatement == 1743 FormatStyle::BWACS_Always) { 1744 addUnwrappedLine(); 1745 } 1746 parseBlock(); 1747 } 1748 addUnwrappedLine(); 1749 return; 1750 case tok::objc_synchronized: 1751 nextToken(); 1752 if (FormatTok->is(tok::l_paren)) { 1753 // Skip synchronization object 1754 parseParens(); 1755 } 1756 if (FormatTok->is(tok::l_brace)) { 1757 if (Style.BraceWrapping.AfterControlStatement == 1758 FormatStyle::BWACS_Always) { 1759 addUnwrappedLine(); 1760 } 1761 parseBlock(); 1762 } 1763 addUnwrappedLine(); 1764 return; 1765 case tok::objc_try: 1766 // This branch isn't strictly necessary (the kw_try case below would 1767 // do this too after the tok::at is parsed above). But be explicit. 1768 parseTryCatch(); 1769 return; 1770 default: 1771 break; 1772 } 1773 break; 1774 case tok::kw_requires: { 1775 if (IsCpp) { 1776 bool ParsedClause = parseRequires(); 1777 if (ParsedClause) 1778 return; 1779 } else { 1780 nextToken(); 1781 } 1782 break; 1783 } 1784 case tok::kw_enum: 1785 // Ignore if this is part of "template <enum ..." or "... -> enum" or 1786 // "template <..., enum ...>". 1787 if (Previous && Previous->isOneOf(tok::less, tok::arrow, tok::comma)) { 1788 nextToken(); 1789 break; 1790 } 1791 1792 // parseEnum falls through and does not yet add an unwrapped line as an 1793 // enum definition can start a structural element. 1794 if (!parseEnum()) 1795 break; 1796 // This only applies to C++ and Verilog. 1797 if (!IsCpp && !Style.isVerilog()) { 1798 addUnwrappedLine(); 1799 return; 1800 } 1801 break; 1802 case tok::kw_typedef: 1803 nextToken(); 1804 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 1805 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS, 1806 Keywords.kw_CF_CLOSED_ENUM, 1807 Keywords.kw_NS_CLOSED_ENUM)) { 1808 parseEnum(); 1809 } 1810 break; 1811 case tok::kw_class: 1812 if (Style.isVerilog()) { 1813 parseBlock(); 1814 addUnwrappedLine(); 1815 return; 1816 } 1817 if (Style.isTableGen()) { 1818 // Do nothing special. In this case the l_brace becomes FunctionLBrace. 1819 // This is same as def and so on. 1820 nextToken(); 1821 break; 1822 } 1823 [[fallthrough]]; 1824 case tok::kw_struct: 1825 case tok::kw_union: 1826 if (parseStructLike()) 1827 return; 1828 break; 1829 case tok::kw_decltype: 1830 nextToken(); 1831 if (FormatTok->is(tok::l_paren)) { 1832 parseParens(); 1833 assert(FormatTok->Previous); 1834 if (FormatTok->Previous->endsSequence(tok::r_paren, tok::kw_auto, 1835 tok::l_paren)) { 1836 Line->SeenDecltypeAuto = true; 1837 } 1838 } 1839 break; 1840 case tok::period: 1841 nextToken(); 1842 // In Java, classes have an implicit static member "class". 1843 if (Style.Language == FormatStyle::LK_Java && FormatTok && 1844 FormatTok->is(tok::kw_class)) { 1845 nextToken(); 1846 } 1847 if (Style.isJavaScript() && FormatTok && 1848 FormatTok->Tok.getIdentifierInfo()) { 1849 // JavaScript only has pseudo keywords, all keywords are allowed to 1850 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 1851 nextToken(); 1852 } 1853 break; 1854 case tok::semi: 1855 nextToken(); 1856 addUnwrappedLine(); 1857 return; 1858 case tok::r_brace: 1859 addUnwrappedLine(); 1860 return; 1861 case tok::l_paren: { 1862 parseParens(); 1863 // Break the unwrapped line if a K&R C function definition has a parameter 1864 // declaration. 1865 if (OpeningBrace || !IsCpp || !Previous || eof()) 1866 break; 1867 if (isC78ParameterDecl(FormatTok, 1868 Tokens->peekNextToken(/*SkipComment=*/true), 1869 Previous)) { 1870 addUnwrappedLine(); 1871 return; 1872 } 1873 break; 1874 } 1875 case tok::kw_operator: 1876 nextToken(); 1877 if (FormatTok->isBinaryOperator()) 1878 nextToken(); 1879 break; 1880 case tok::caret: 1881 nextToken(); 1882 // Block return type. 1883 if (FormatTok->Tok.isAnyIdentifier() || FormatTok->isTypeName(LangOpts)) { 1884 nextToken(); 1885 // Return types: pointers are ok too. 1886 while (FormatTok->is(tok::star)) 1887 nextToken(); 1888 } 1889 // Block argument list. 1890 if (FormatTok->is(tok::l_paren)) 1891 parseParens(); 1892 // Block body. 1893 if (FormatTok->is(tok::l_brace)) 1894 parseChildBlock(); 1895 break; 1896 case tok::l_brace: 1897 if (InRequiresExpression) 1898 FormatTok->setFinalizedType(TT_BracedListLBrace); 1899 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) { 1900 IsDecltypeAutoFunction = Line->SeenDecltypeAuto; 1901 // A block outside of parentheses must be the last part of a 1902 // structural element. 1903 // FIXME: Figure out cases where this is not true, and add projections 1904 // for them (the one we know is missing are lambdas). 1905 if (Style.Language == FormatStyle::LK_Java && 1906 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) { 1907 // If necessary, we could set the type to something different than 1908 // TT_FunctionLBrace. 1909 if (Style.BraceWrapping.AfterControlStatement == 1910 FormatStyle::BWACS_Always) { 1911 addUnwrappedLine(); 1912 } 1913 } else if (Style.BraceWrapping.AfterFunction) { 1914 addUnwrappedLine(); 1915 } 1916 if (!Previous || Previous->isNot(TT_TypeDeclarationParen)) 1917 FormatTok->setFinalizedType(TT_FunctionLBrace); 1918 parseBlock(); 1919 IsDecltypeAutoFunction = false; 1920 addUnwrappedLine(); 1921 return; 1922 } 1923 // Otherwise this was a braced init list, and the structural 1924 // element continues. 1925 break; 1926 case tok::kw_try: 1927 if (Style.isJavaScript() && Line->MustBeDeclaration) { 1928 // field/method declaration. 1929 nextToken(); 1930 break; 1931 } 1932 // We arrive here when parsing function-try blocks. 1933 if (Style.BraceWrapping.AfterFunction) 1934 addUnwrappedLine(); 1935 parseTryCatch(); 1936 return; 1937 case tok::identifier: { 1938 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) && 1939 Line->MustBeDeclaration) { 1940 addUnwrappedLine(); 1941 parseCSharpGenericTypeConstraint(); 1942 break; 1943 } 1944 if (FormatTok->is(TT_MacroBlockEnd)) { 1945 addUnwrappedLine(); 1946 return; 1947 } 1948 1949 // Function declarations (as opposed to function expressions) are parsed 1950 // on their own unwrapped line by continuing this loop. Function 1951 // expressions (functions that are not on their own line) must not create 1952 // a new unwrapped line, so they are special cased below. 1953 size_t TokenCount = Line->Tokens.size(); 1954 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) && 1955 (TokenCount > 1 || 1956 (TokenCount == 1 && 1957 Line->Tokens.front().Tok->isNot(Keywords.kw_async)))) { 1958 tryToParseJSFunction(); 1959 break; 1960 } 1961 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) && 1962 FormatTok->is(Keywords.kw_interface)) { 1963 if (Style.isJavaScript()) { 1964 // In JavaScript/TypeScript, "interface" can be used as a standalone 1965 // identifier, e.g. in `var interface = 1;`. If "interface" is 1966 // followed by another identifier, it is very like to be an actual 1967 // interface declaration. 1968 unsigned StoredPosition = Tokens->getPosition(); 1969 FormatToken *Next = Tokens->getNextToken(); 1970 FormatTok = Tokens->setPosition(StoredPosition); 1971 if (!mustBeJSIdent(Keywords, Next)) { 1972 nextToken(); 1973 break; 1974 } 1975 } 1976 parseRecord(); 1977 addUnwrappedLine(); 1978 return; 1979 } 1980 1981 if (Style.isVerilog()) { 1982 if (FormatTok->is(Keywords.kw_table)) { 1983 parseVerilogTable(); 1984 return; 1985 } 1986 if (Keywords.isVerilogBegin(*FormatTok) || 1987 Keywords.isVerilogHierarchy(*FormatTok)) { 1988 parseBlock(); 1989 addUnwrappedLine(); 1990 return; 1991 } 1992 } 1993 1994 if (!IsCpp && FormatTok->is(Keywords.kw_interface)) { 1995 if (parseStructLike()) 1996 return; 1997 break; 1998 } 1999 2000 if (IsCpp && FormatTok->is(TT_StatementMacro)) { 2001 parseStatementMacro(); 2002 return; 2003 } 2004 2005 // See if the following token should start a new unwrapped line. 2006 StringRef Text = FormatTok->TokenText; 2007 2008 FormatToken *PreviousToken = FormatTok; 2009 nextToken(); 2010 2011 // JS doesn't have macros, and within classes colons indicate fields, not 2012 // labels. 2013 if (Style.isJavaScript()) 2014 break; 2015 2016 auto OneTokenSoFar = [&]() { 2017 auto I = Line->Tokens.begin(), E = Line->Tokens.end(); 2018 while (I != E && I->Tok->is(tok::comment)) 2019 ++I; 2020 if (Style.isVerilog()) 2021 while (I != E && I->Tok->is(tok::hash)) 2022 ++I; 2023 return I != E && (++I == E); 2024 }; 2025 if (OneTokenSoFar()) { 2026 // Recognize function-like macro usages without trailing semicolon as 2027 // well as free-standing macros like Q_OBJECT. 2028 bool FunctionLike = FormatTok->is(tok::l_paren); 2029 if (FunctionLike) 2030 parseParens(); 2031 2032 bool FollowedByNewline = 2033 CommentsBeforeNextToken.empty() 2034 ? FormatTok->NewlinesBefore > 0 2035 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 2036 2037 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && 2038 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { 2039 if (PreviousToken->isNot(TT_UntouchableMacroFunc)) 2040 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); 2041 addUnwrappedLine(); 2042 return; 2043 } 2044 } 2045 break; 2046 } 2047 case tok::equal: 2048 if ((Style.isJavaScript() || Style.isCSharp()) && 2049 FormatTok->is(TT_FatArrow)) { 2050 tryToParseChildBlock(); 2051 break; 2052 } 2053 2054 nextToken(); 2055 if (FormatTok->is(tok::l_brace)) { 2056 // Block kind should probably be set to BK_BracedInit for any language. 2057 // C# needs this change to ensure that array initialisers and object 2058 // initialisers are indented the same way. 2059 if (Style.isCSharp()) 2060 FormatTok->setBlockKind(BK_BracedInit); 2061 // TableGen's defset statement has syntax of the form, 2062 // `defset <type> <name> = { <statement>... }` 2063 if (Style.isTableGen() && 2064 Line->Tokens.begin()->Tok->is(Keywords.kw_defset)) { 2065 FormatTok->setFinalizedType(TT_FunctionLBrace); 2066 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2067 /*MunchSemi=*/false); 2068 addUnwrappedLine(); 2069 break; 2070 } 2071 nextToken(); 2072 parseBracedList(); 2073 } else if (Style.Language == FormatStyle::LK_Proto && 2074 FormatTok->is(tok::less)) { 2075 nextToken(); 2076 parseBracedList(/*IsAngleBracket=*/true); 2077 } 2078 break; 2079 case tok::l_square: 2080 parseSquare(); 2081 break; 2082 case tok::kw_new: 2083 parseNew(); 2084 break; 2085 case tok::kw_switch: 2086 if (Style.Language == FormatStyle::LK_Java) 2087 parseSwitch(/*IsExpr=*/true); 2088 nextToken(); 2089 break; 2090 case tok::kw_case: 2091 // Proto: there are no switch/case statements. 2092 if (Style.Language == FormatStyle::LK_Proto) { 2093 nextToken(); 2094 return; 2095 } 2096 // In Verilog switch is called case. 2097 if (Style.isVerilog()) { 2098 parseBlock(); 2099 addUnwrappedLine(); 2100 return; 2101 } 2102 if (Style.isJavaScript() && Line->MustBeDeclaration) { 2103 // 'case: string' field declaration. 2104 nextToken(); 2105 break; 2106 } 2107 parseCaseLabel(); 2108 break; 2109 case tok::kw_default: 2110 nextToken(); 2111 if (Style.isVerilog()) { 2112 if (FormatTok->is(tok::colon)) { 2113 // The label will be handled in the next iteration. 2114 break; 2115 } 2116 if (FormatTok->is(Keywords.kw_clocking)) { 2117 // A default clocking block. 2118 parseBlock(); 2119 addUnwrappedLine(); 2120 return; 2121 } 2122 parseVerilogCaseLabel(); 2123 return; 2124 } 2125 break; 2126 case tok::colon: 2127 nextToken(); 2128 if (Style.isVerilog()) { 2129 parseVerilogCaseLabel(); 2130 return; 2131 } 2132 break; 2133 default: 2134 nextToken(); 2135 break; 2136 } 2137 } 2138 } 2139 2140 bool UnwrappedLineParser::tryToParsePropertyAccessor() { 2141 assert(FormatTok->is(tok::l_brace)); 2142 if (!Style.isCSharp()) 2143 return false; 2144 // See if it's a property accessor. 2145 if (FormatTok->Previous->isNot(tok::identifier)) 2146 return false; 2147 2148 // See if we are inside a property accessor. 2149 // 2150 // Record the current tokenPosition so that we can advance and 2151 // reset the current token. `Next` is not set yet so we need 2152 // another way to advance along the token stream. 2153 unsigned int StoredPosition = Tokens->getPosition(); 2154 FormatToken *Tok = Tokens->getNextToken(); 2155 2156 // A trivial property accessor is of the form: 2157 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] } 2158 // Track these as they do not require line breaks to be introduced. 2159 bool HasSpecialAccessor = false; 2160 bool IsTrivialPropertyAccessor = true; 2161 while (!eof()) { 2162 if (Tok->isAccessSpecifierKeyword() || 2163 Tok->isOneOf(tok::semi, Keywords.kw_internal, Keywords.kw_get, 2164 Keywords.kw_init, Keywords.kw_set)) { 2165 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set)) 2166 HasSpecialAccessor = true; 2167 Tok = Tokens->getNextToken(); 2168 continue; 2169 } 2170 if (Tok->isNot(tok::r_brace)) 2171 IsTrivialPropertyAccessor = false; 2172 break; 2173 } 2174 2175 if (!HasSpecialAccessor) { 2176 Tokens->setPosition(StoredPosition); 2177 return false; 2178 } 2179 2180 // Try to parse the property accessor: 2181 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties 2182 Tokens->setPosition(StoredPosition); 2183 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) 2184 addUnwrappedLine(); 2185 nextToken(); 2186 do { 2187 switch (FormatTok->Tok.getKind()) { 2188 case tok::r_brace: 2189 nextToken(); 2190 if (FormatTok->is(tok::equal)) { 2191 while (!eof() && FormatTok->isNot(tok::semi)) 2192 nextToken(); 2193 nextToken(); 2194 } 2195 addUnwrappedLine(); 2196 return true; 2197 case tok::l_brace: 2198 ++Line->Level; 2199 parseBlock(/*MustBeDeclaration=*/true); 2200 addUnwrappedLine(); 2201 --Line->Level; 2202 break; 2203 case tok::equal: 2204 if (FormatTok->is(TT_FatArrow)) { 2205 ++Line->Level; 2206 do { 2207 nextToken(); 2208 } while (!eof() && FormatTok->isNot(tok::semi)); 2209 nextToken(); 2210 addUnwrappedLine(); 2211 --Line->Level; 2212 break; 2213 } 2214 nextToken(); 2215 break; 2216 default: 2217 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init, 2218 Keywords.kw_set) && 2219 !IsTrivialPropertyAccessor) { 2220 // Non-trivial get/set needs to be on its own line. 2221 addUnwrappedLine(); 2222 } 2223 nextToken(); 2224 } 2225 } while (!eof()); 2226 2227 // Unreachable for well-formed code (paired '{' and '}'). 2228 return true; 2229 } 2230 2231 bool UnwrappedLineParser::tryToParseLambda() { 2232 assert(FormatTok->is(tok::l_square)); 2233 if (!IsCpp) { 2234 nextToken(); 2235 return false; 2236 } 2237 FormatToken &LSquare = *FormatTok; 2238 if (!tryToParseLambdaIntroducer()) 2239 return false; 2240 2241 bool SeenArrow = false; 2242 bool InTemplateParameterList = false; 2243 2244 while (FormatTok->isNot(tok::l_brace)) { 2245 if (FormatTok->isTypeName(LangOpts) || FormatTok->isAttribute()) { 2246 nextToken(); 2247 continue; 2248 } 2249 switch (FormatTok->Tok.getKind()) { 2250 case tok::l_brace: 2251 break; 2252 case tok::l_paren: 2253 parseParens(/*AmpAmpTokenType=*/TT_PointerOrReference); 2254 break; 2255 case tok::l_square: 2256 parseSquare(); 2257 break; 2258 case tok::less: 2259 assert(FormatTok->Previous); 2260 if (FormatTok->Previous->is(tok::r_square)) 2261 InTemplateParameterList = true; 2262 nextToken(); 2263 break; 2264 case tok::kw_auto: 2265 case tok::kw_class: 2266 case tok::kw_struct: 2267 case tok::kw_union: 2268 case tok::kw_template: 2269 case tok::kw_typename: 2270 case tok::amp: 2271 case tok::star: 2272 case tok::kw_const: 2273 case tok::kw_constexpr: 2274 case tok::kw_consteval: 2275 case tok::comma: 2276 case tok::greater: 2277 case tok::identifier: 2278 case tok::numeric_constant: 2279 case tok::coloncolon: 2280 case tok::kw_mutable: 2281 case tok::kw_noexcept: 2282 case tok::kw_static: 2283 nextToken(); 2284 break; 2285 // Specialization of a template with an integer parameter can contain 2286 // arithmetic, logical, comparison and ternary operators. 2287 // 2288 // FIXME: This also accepts sequences of operators that are not in the scope 2289 // of a template argument list. 2290 // 2291 // In a C++ lambda a template type can only occur after an arrow. We use 2292 // this as an heuristic to distinguish between Objective-C expressions 2293 // followed by an `a->b` expression, such as: 2294 // ([obj func:arg] + a->b) 2295 // Otherwise the code below would parse as a lambda. 2296 case tok::plus: 2297 case tok::minus: 2298 case tok::exclaim: 2299 case tok::tilde: 2300 case tok::slash: 2301 case tok::percent: 2302 case tok::lessless: 2303 case tok::pipe: 2304 case tok::pipepipe: 2305 case tok::ampamp: 2306 case tok::caret: 2307 case tok::equalequal: 2308 case tok::exclaimequal: 2309 case tok::greaterequal: 2310 case tok::lessequal: 2311 case tok::question: 2312 case tok::colon: 2313 case tok::ellipsis: 2314 case tok::kw_true: 2315 case tok::kw_false: 2316 if (SeenArrow || InTemplateParameterList) { 2317 nextToken(); 2318 break; 2319 } 2320 return true; 2321 case tok::arrow: 2322 // This might or might not actually be a lambda arrow (this could be an 2323 // ObjC method invocation followed by a dereferencing arrow). We might 2324 // reset this back to TT_Unknown in TokenAnnotator. 2325 FormatTok->setFinalizedType(TT_TrailingReturnArrow); 2326 SeenArrow = true; 2327 nextToken(); 2328 break; 2329 case tok::kw_requires: { 2330 auto *RequiresToken = FormatTok; 2331 nextToken(); 2332 parseRequiresClause(RequiresToken); 2333 break; 2334 } 2335 case tok::equal: 2336 if (!InTemplateParameterList) 2337 return true; 2338 nextToken(); 2339 break; 2340 default: 2341 return true; 2342 } 2343 } 2344 2345 FormatTok->setFinalizedType(TT_LambdaLBrace); 2346 LSquare.setFinalizedType(TT_LambdaLSquare); 2347 2348 NestedLambdas.push_back(Line->SeenDecltypeAuto); 2349 parseChildBlock(); 2350 assert(!NestedLambdas.empty()); 2351 NestedLambdas.pop_back(); 2352 2353 return true; 2354 } 2355 2356 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 2357 const FormatToken *Previous = FormatTok->Previous; 2358 const FormatToken *LeftSquare = FormatTok; 2359 nextToken(); 2360 if ((Previous && ((Previous->Tok.getIdentifierInfo() && 2361 !Previous->isOneOf(tok::kw_return, tok::kw_co_await, 2362 tok::kw_co_yield, tok::kw_co_return)) || 2363 Previous->closesScope())) || 2364 LeftSquare->isCppStructuredBinding(IsCpp)) { 2365 return false; 2366 } 2367 if (FormatTok->is(tok::l_square) || tok::isLiteral(FormatTok->Tok.getKind())) 2368 return false; 2369 if (FormatTok->is(tok::r_square)) { 2370 const FormatToken *Next = Tokens->peekNextToken(/*SkipComment=*/true); 2371 if (Next->is(tok::greater)) 2372 return false; 2373 } 2374 parseSquare(/*LambdaIntroducer=*/true); 2375 return true; 2376 } 2377 2378 void UnwrappedLineParser::tryToParseJSFunction() { 2379 assert(FormatTok->is(Keywords.kw_function)); 2380 if (FormatTok->is(Keywords.kw_async)) 2381 nextToken(); 2382 // Consume "function". 2383 nextToken(); 2384 2385 // Consume * (generator function). Treat it like C++'s overloaded operators. 2386 if (FormatTok->is(tok::star)) { 2387 FormatTok->setFinalizedType(TT_OverloadedOperator); 2388 nextToken(); 2389 } 2390 2391 // Consume function name. 2392 if (FormatTok->is(tok::identifier)) 2393 nextToken(); 2394 2395 if (FormatTok->isNot(tok::l_paren)) 2396 return; 2397 2398 // Parse formal parameter list. 2399 parseParens(); 2400 2401 if (FormatTok->is(tok::colon)) { 2402 // Parse a type definition. 2403 nextToken(); 2404 2405 // Eat the type declaration. For braced inline object types, balance braces, 2406 // otherwise just parse until finding an l_brace for the function body. 2407 if (FormatTok->is(tok::l_brace)) 2408 tryToParseBracedList(); 2409 else 2410 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) 2411 nextToken(); 2412 } 2413 2414 if (FormatTok->is(tok::semi)) 2415 return; 2416 2417 parseChildBlock(); 2418 } 2419 2420 bool UnwrappedLineParser::tryToParseBracedList() { 2421 if (FormatTok->is(BK_Unknown)) 2422 calculateBraceTypes(); 2423 assert(FormatTok->isNot(BK_Unknown)); 2424 if (FormatTok->is(BK_Block)) 2425 return false; 2426 nextToken(); 2427 parseBracedList(); 2428 return true; 2429 } 2430 2431 bool UnwrappedLineParser::tryToParseChildBlock() { 2432 assert(Style.isJavaScript() || Style.isCSharp()); 2433 assert(FormatTok->is(TT_FatArrow)); 2434 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. 2435 // They always start an expression or a child block if followed by a curly 2436 // brace. 2437 nextToken(); 2438 if (FormatTok->isNot(tok::l_brace)) 2439 return false; 2440 parseChildBlock(); 2441 return true; 2442 } 2443 2444 bool UnwrappedLineParser::parseBracedList(bool IsAngleBracket, bool IsEnum) { 2445 assert(!IsAngleBracket || !IsEnum); 2446 bool HasError = false; 2447 2448 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 2449 // replace this by using parseAssignmentExpression() inside. 2450 do { 2451 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && 2452 tryToParseChildBlock()) { 2453 continue; 2454 } 2455 if (Style.isJavaScript()) { 2456 if (FormatTok->is(Keywords.kw_function)) { 2457 tryToParseJSFunction(); 2458 continue; 2459 } 2460 if (FormatTok->is(tok::l_brace)) { 2461 // Could be a method inside of a braced list `{a() { return 1; }}`. 2462 if (tryToParseBracedList()) 2463 continue; 2464 parseChildBlock(); 2465 } 2466 } 2467 if (FormatTok->is(IsAngleBracket ? tok::greater : tok::r_brace)) { 2468 if (IsEnum) { 2469 FormatTok->setBlockKind(BK_Block); 2470 if (!Style.AllowShortEnumsOnASingleLine) 2471 addUnwrappedLine(); 2472 } 2473 nextToken(); 2474 return !HasError; 2475 } 2476 switch (FormatTok->Tok.getKind()) { 2477 case tok::l_square: 2478 if (Style.isCSharp()) 2479 parseSquare(); 2480 else 2481 tryToParseLambda(); 2482 break; 2483 case tok::l_paren: 2484 parseParens(); 2485 // JavaScript can just have free standing methods and getters/setters in 2486 // object literals. Detect them by a "{" following ")". 2487 if (Style.isJavaScript()) { 2488 if (FormatTok->is(tok::l_brace)) 2489 parseChildBlock(); 2490 break; 2491 } 2492 break; 2493 case tok::l_brace: 2494 // Assume there are no blocks inside a braced init list apart 2495 // from the ones we explicitly parse out (like lambdas). 2496 FormatTok->setBlockKind(BK_BracedInit); 2497 nextToken(); 2498 parseBracedList(); 2499 break; 2500 case tok::less: 2501 nextToken(); 2502 if (IsAngleBracket) 2503 parseBracedList(/*IsAngleBracket=*/true); 2504 break; 2505 case tok::semi: 2506 // JavaScript (or more precisely TypeScript) can have semicolons in braced 2507 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be 2508 // used for error recovery if we have otherwise determined that this is 2509 // a braced list. 2510 if (Style.isJavaScript()) { 2511 nextToken(); 2512 break; 2513 } 2514 HasError = true; 2515 if (!IsEnum) 2516 return false; 2517 nextToken(); 2518 break; 2519 case tok::comma: 2520 nextToken(); 2521 if (IsEnum && !Style.AllowShortEnumsOnASingleLine) 2522 addUnwrappedLine(); 2523 break; 2524 default: 2525 nextToken(); 2526 break; 2527 } 2528 } while (!eof()); 2529 return false; 2530 } 2531 2532 /// \brief Parses a pair of parentheses (and everything between them). 2533 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all 2534 /// double ampersands. This applies for all nested scopes as well. 2535 /// 2536 /// Returns whether there is a `=` token between the parentheses. 2537 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { 2538 assert(FormatTok->is(tok::l_paren) && "'(' expected."); 2539 auto *LeftParen = FormatTok; 2540 bool SeenEqual = false; 2541 bool MightBeFoldExpr = false; 2542 const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace); 2543 nextToken(); 2544 do { 2545 switch (FormatTok->Tok.getKind()) { 2546 case tok::l_paren: 2547 if (parseParens(AmpAmpTokenType)) 2548 SeenEqual = true; 2549 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 2550 parseChildBlock(); 2551 break; 2552 case tok::r_paren: { 2553 const auto *Prev = LeftParen->Previous; 2554 if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody && 2555 Style.RemoveParentheses > FormatStyle::RPS_Leave) { 2556 const auto *Next = Tokens->peekNextToken(); 2557 const bool DoubleParens = 2558 Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren); 2559 const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; 2560 const bool Blacklisted = 2561 PrevPrev && 2562 (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || 2563 (SeenEqual && 2564 (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || 2565 PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); 2566 const bool ReturnParens = 2567 Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement && 2568 ((NestedLambdas.empty() && !IsDecltypeAutoFunction) || 2569 (!NestedLambdas.empty() && !NestedLambdas.back())) && 2570 Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next && 2571 Next->is(tok::semi); 2572 if ((DoubleParens && !Blacklisted) || ReturnParens) { 2573 LeftParen->Optional = true; 2574 FormatTok->Optional = true; 2575 } 2576 } 2577 if (Prev && Prev->is(TT_TypenameMacro)) { 2578 LeftParen->setFinalizedType(TT_TypeDeclarationParen); 2579 FormatTok->setFinalizedType(TT_TypeDeclarationParen); 2580 } 2581 nextToken(); 2582 return SeenEqual; 2583 } 2584 case tok::r_brace: 2585 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2586 return SeenEqual; 2587 case tok::l_square: 2588 tryToParseLambda(); 2589 break; 2590 case tok::l_brace: 2591 if (!tryToParseBracedList()) 2592 parseChildBlock(); 2593 break; 2594 case tok::at: 2595 nextToken(); 2596 if (FormatTok->is(tok::l_brace)) { 2597 nextToken(); 2598 parseBracedList(); 2599 } 2600 break; 2601 case tok::ellipsis: 2602 MightBeFoldExpr = true; 2603 nextToken(); 2604 break; 2605 case tok::equal: 2606 SeenEqual = true; 2607 if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) 2608 tryToParseChildBlock(); 2609 else 2610 nextToken(); 2611 break; 2612 case tok::kw_class: 2613 if (Style.isJavaScript()) 2614 parseRecord(/*ParseAsExpr=*/true); 2615 else 2616 nextToken(); 2617 break; 2618 case tok::identifier: 2619 if (Style.isJavaScript() && (FormatTok->is(Keywords.kw_function))) 2620 tryToParseJSFunction(); 2621 else 2622 nextToken(); 2623 break; 2624 case tok::kw_switch: 2625 parseSwitch(/*IsExpr=*/true); 2626 break; 2627 case tok::kw_requires: { 2628 auto RequiresToken = FormatTok; 2629 nextToken(); 2630 parseRequiresExpression(RequiresToken); 2631 break; 2632 } 2633 case tok::ampamp: 2634 if (AmpAmpTokenType != TT_Unknown) 2635 FormatTok->setFinalizedType(AmpAmpTokenType); 2636 [[fallthrough]]; 2637 default: 2638 nextToken(); 2639 break; 2640 } 2641 } while (!eof()); 2642 return SeenEqual; 2643 } 2644 2645 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { 2646 if (!LambdaIntroducer) { 2647 assert(FormatTok->is(tok::l_square) && "'[' expected."); 2648 if (tryToParseLambda()) 2649 return; 2650 } 2651 do { 2652 switch (FormatTok->Tok.getKind()) { 2653 case tok::l_paren: 2654 parseParens(); 2655 break; 2656 case tok::r_square: 2657 nextToken(); 2658 return; 2659 case tok::r_brace: 2660 // A "}" inside parenthesis is an error if there wasn't a matching "{". 2661 return; 2662 case tok::l_square: 2663 parseSquare(); 2664 break; 2665 case tok::l_brace: { 2666 if (!tryToParseBracedList()) 2667 parseChildBlock(); 2668 break; 2669 } 2670 case tok::at: 2671 nextToken(); 2672 if (FormatTok->is(tok::l_brace)) { 2673 nextToken(); 2674 parseBracedList(); 2675 } 2676 break; 2677 default: 2678 nextToken(); 2679 break; 2680 } 2681 } while (!eof()); 2682 } 2683 2684 void UnwrappedLineParser::keepAncestorBraces() { 2685 if (!Style.RemoveBracesLLVM) 2686 return; 2687 2688 const int MaxNestingLevels = 2; 2689 const int Size = NestedTooDeep.size(); 2690 if (Size >= MaxNestingLevels) 2691 NestedTooDeep[Size - MaxNestingLevels] = true; 2692 NestedTooDeep.push_back(false); 2693 } 2694 2695 static FormatToken *getLastNonComment(const UnwrappedLine &Line) { 2696 for (const auto &Token : llvm::reverse(Line.Tokens)) 2697 if (Token.Tok->isNot(tok::comment)) 2698 return Token.Tok; 2699 2700 return nullptr; 2701 } 2702 2703 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) { 2704 FormatToken *Tok = nullptr; 2705 2706 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() && 2707 PreprocessorDirectives.empty() && FormatTok->isNot(tok::semi)) { 2708 Tok = Style.BraceWrapping.AfterControlStatement == FormatStyle::BWACS_Never 2709 ? getLastNonComment(*Line) 2710 : Line->Tokens.back().Tok; 2711 assert(Tok); 2712 if (Tok->BraceCount < 0) { 2713 assert(Tok->BraceCount == -1); 2714 Tok = nullptr; 2715 } else { 2716 Tok->BraceCount = -1; 2717 } 2718 } 2719 2720 addUnwrappedLine(); 2721 ++Line->Level; 2722 ++Line->UnbracedBodyLevel; 2723 parseStructuralElement(); 2724 --Line->UnbracedBodyLevel; 2725 2726 if (Tok) { 2727 assert(!Line->InPPDirective); 2728 Tok = nullptr; 2729 for (const auto &L : llvm::reverse(*CurrentLines)) { 2730 if (!L.InPPDirective && getLastNonComment(L)) { 2731 Tok = L.Tokens.back().Tok; 2732 break; 2733 } 2734 } 2735 assert(Tok); 2736 ++Tok->BraceCount; 2737 } 2738 2739 if (CheckEOF && eof()) 2740 addUnwrappedLine(); 2741 2742 --Line->Level; 2743 } 2744 2745 static void markOptionalBraces(FormatToken *LeftBrace) { 2746 if (!LeftBrace) 2747 return; 2748 2749 assert(LeftBrace->is(tok::l_brace)); 2750 2751 FormatToken *RightBrace = LeftBrace->MatchingParen; 2752 if (!RightBrace) { 2753 assert(!LeftBrace->Optional); 2754 return; 2755 } 2756 2757 assert(RightBrace->is(tok::r_brace)); 2758 assert(RightBrace->MatchingParen == LeftBrace); 2759 assert(LeftBrace->Optional == RightBrace->Optional); 2760 2761 LeftBrace->Optional = true; 2762 RightBrace->Optional = true; 2763 } 2764 2765 void UnwrappedLineParser::handleAttributes() { 2766 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`. 2767 if (FormatTok->isAttribute()) 2768 nextToken(); 2769 else if (FormatTok->is(tok::l_square)) 2770 handleCppAttributes(); 2771 } 2772 2773 bool UnwrappedLineParser::handleCppAttributes() { 2774 // Handle [[likely]] / [[unlikely]] attributes. 2775 assert(FormatTok->is(tok::l_square)); 2776 if (!tryToParseSimpleAttribute()) 2777 return false; 2778 parseSquare(); 2779 return true; 2780 } 2781 2782 /// Returns whether \c Tok begins a block. 2783 bool UnwrappedLineParser::isBlockBegin(const FormatToken &Tok) const { 2784 // FIXME: rename the function or make 2785 // Tok.isOneOf(tok::l_brace, TT_MacroBlockBegin) work. 2786 return Style.isVerilog() ? Keywords.isVerilogBegin(Tok) 2787 : Tok.is(tok::l_brace); 2788 } 2789 2790 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind, 2791 bool KeepBraces, 2792 bool IsVerilogAssert) { 2793 assert((FormatTok->is(tok::kw_if) || 2794 (Style.isVerilog() && 2795 FormatTok->isOneOf(tok::kw_restrict, Keywords.kw_assert, 2796 Keywords.kw_assume, Keywords.kw_cover))) && 2797 "'if' expected"); 2798 nextToken(); 2799 2800 if (IsVerilogAssert) { 2801 // Handle `assert #0` and `assert final`. 2802 if (FormatTok->is(Keywords.kw_verilogHash)) { 2803 nextToken(); 2804 if (FormatTok->is(tok::numeric_constant)) 2805 nextToken(); 2806 } else if (FormatTok->isOneOf(Keywords.kw_final, Keywords.kw_property, 2807 Keywords.kw_sequence)) { 2808 nextToken(); 2809 } 2810 } 2811 2812 // TableGen's if statement has the form of `if <cond> then { ... }`. 2813 if (Style.isTableGen()) { 2814 while (!eof() && FormatTok->isNot(Keywords.kw_then)) { 2815 // Simply skip until then. This range only contains a value. 2816 nextToken(); 2817 } 2818 } 2819 2820 // Handle `if !consteval`. 2821 if (FormatTok->is(tok::exclaim)) 2822 nextToken(); 2823 2824 bool KeepIfBraces = true; 2825 if (FormatTok->is(tok::kw_consteval)) { 2826 nextToken(); 2827 } else { 2828 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces; 2829 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier)) 2830 nextToken(); 2831 if (FormatTok->is(tok::l_paren)) { 2832 FormatTok->setFinalizedType(TT_ConditionLParen); 2833 parseParens(); 2834 } 2835 } 2836 handleAttributes(); 2837 // The then action is optional in Verilog assert statements. 2838 if (IsVerilogAssert && FormatTok->is(tok::semi)) { 2839 nextToken(); 2840 addUnwrappedLine(); 2841 return nullptr; 2842 } 2843 2844 bool NeedsUnwrappedLine = false; 2845 keepAncestorBraces(); 2846 2847 FormatToken *IfLeftBrace = nullptr; 2848 IfStmtKind IfBlockKind = IfStmtKind::NotIf; 2849 2850 if (isBlockBegin(*FormatTok)) { 2851 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 2852 IfLeftBrace = FormatTok; 2853 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2854 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2855 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind); 2856 setPreviousRBraceType(TT_ControlStatementRBrace); 2857 if (Style.BraceWrapping.BeforeElse) 2858 addUnwrappedLine(); 2859 else 2860 NeedsUnwrappedLine = true; 2861 } else if (IsVerilogAssert && FormatTok->is(tok::kw_else)) { 2862 addUnwrappedLine(); 2863 } else { 2864 parseUnbracedBody(); 2865 } 2866 2867 if (Style.RemoveBracesLLVM) { 2868 assert(!NestedTooDeep.empty()); 2869 KeepIfBraces = KeepIfBraces || 2870 (IfLeftBrace && !IfLeftBrace->MatchingParen) || 2871 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly || 2872 IfBlockKind == IfStmtKind::IfElseIf; 2873 } 2874 2875 bool KeepElseBraces = KeepIfBraces; 2876 FormatToken *ElseLeftBrace = nullptr; 2877 IfStmtKind Kind = IfStmtKind::IfOnly; 2878 2879 if (FormatTok->is(tok::kw_else)) { 2880 if (Style.RemoveBracesLLVM) { 2881 NestedTooDeep.back() = false; 2882 Kind = IfStmtKind::IfElse; 2883 } 2884 nextToken(); 2885 handleAttributes(); 2886 if (isBlockBegin(*FormatTok)) { 2887 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if); 2888 FormatTok->setFinalizedType(TT_ElseLBrace); 2889 ElseLeftBrace = FormatTok; 2890 CompoundStatementIndenter Indenter(this, Style, Line->Level); 2891 IfStmtKind ElseBlockKind = IfStmtKind::NotIf; 2892 FormatToken *IfLBrace = 2893 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 2894 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind); 2895 setPreviousRBraceType(TT_ElseRBrace); 2896 if (FormatTok->is(tok::kw_else)) { 2897 KeepElseBraces = KeepElseBraces || 2898 ElseBlockKind == IfStmtKind::IfOnly || 2899 ElseBlockKind == IfStmtKind::IfElseIf; 2900 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) { 2901 KeepElseBraces = true; 2902 assert(ElseLeftBrace->MatchingParen); 2903 markOptionalBraces(ElseLeftBrace); 2904 } 2905 addUnwrappedLine(); 2906 } else if (!IsVerilogAssert && FormatTok->is(tok::kw_if)) { 2907 const FormatToken *Previous = Tokens->getPreviousToken(); 2908 assert(Previous); 2909 const bool IsPrecededByComment = Previous->is(tok::comment); 2910 if (IsPrecededByComment) { 2911 addUnwrappedLine(); 2912 ++Line->Level; 2913 } 2914 bool TooDeep = true; 2915 if (Style.RemoveBracesLLVM) { 2916 Kind = IfStmtKind::IfElseIf; 2917 TooDeep = NestedTooDeep.pop_back_val(); 2918 } 2919 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces); 2920 if (Style.RemoveBracesLLVM) 2921 NestedTooDeep.push_back(TooDeep); 2922 if (IsPrecededByComment) 2923 --Line->Level; 2924 } else { 2925 parseUnbracedBody(/*CheckEOF=*/true); 2926 } 2927 } else { 2928 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse; 2929 if (NeedsUnwrappedLine) 2930 addUnwrappedLine(); 2931 } 2932 2933 if (!Style.RemoveBracesLLVM) 2934 return nullptr; 2935 2936 assert(!NestedTooDeep.empty()); 2937 KeepElseBraces = KeepElseBraces || 2938 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || 2939 NestedTooDeep.back(); 2940 2941 NestedTooDeep.pop_back(); 2942 2943 if (!KeepIfBraces && !KeepElseBraces) { 2944 markOptionalBraces(IfLeftBrace); 2945 markOptionalBraces(ElseLeftBrace); 2946 } else if (IfLeftBrace) { 2947 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen; 2948 if (IfRightBrace) { 2949 assert(IfRightBrace->MatchingParen == IfLeftBrace); 2950 assert(!IfLeftBrace->Optional); 2951 assert(!IfRightBrace->Optional); 2952 IfLeftBrace->MatchingParen = nullptr; 2953 IfRightBrace->MatchingParen = nullptr; 2954 } 2955 } 2956 2957 if (IfKind) 2958 *IfKind = Kind; 2959 2960 return IfLeftBrace; 2961 } 2962 2963 void UnwrappedLineParser::parseTryCatch() { 2964 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 2965 nextToken(); 2966 bool NeedsUnwrappedLine = false; 2967 bool HasCtorInitializer = false; 2968 if (FormatTok->is(tok::colon)) { 2969 auto *Colon = FormatTok; 2970 // We are in a function try block, what comes is an initializer list. 2971 nextToken(); 2972 if (FormatTok->is(tok::identifier)) { 2973 HasCtorInitializer = true; 2974 Colon->setFinalizedType(TT_CtorInitializerColon); 2975 } 2976 2977 // In case identifiers were removed by clang-tidy, what might follow is 2978 // multiple commas in sequence - before the first identifier. 2979 while (FormatTok->is(tok::comma)) 2980 nextToken(); 2981 2982 while (FormatTok->is(tok::identifier)) { 2983 nextToken(); 2984 if (FormatTok->is(tok::l_paren)) { 2985 parseParens(); 2986 } else if (FormatTok->is(tok::l_brace)) { 2987 nextToken(); 2988 parseBracedList(); 2989 } 2990 2991 // In case identifiers were removed by clang-tidy, what might follow is 2992 // multiple commas in sequence - after the first identifier. 2993 while (FormatTok->is(tok::comma)) 2994 nextToken(); 2995 } 2996 } 2997 // Parse try with resource. 2998 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) 2999 parseParens(); 3000 3001 keepAncestorBraces(); 3002 3003 if (FormatTok->is(tok::l_brace)) { 3004 if (HasCtorInitializer) 3005 FormatTok->setFinalizedType(TT_FunctionLBrace); 3006 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3007 parseBlock(); 3008 if (Style.BraceWrapping.BeforeCatch) 3009 addUnwrappedLine(); 3010 else 3011 NeedsUnwrappedLine = true; 3012 } else if (FormatTok->isNot(tok::kw_catch)) { 3013 // The C++ standard requires a compound-statement after a try. 3014 // If there's none, we try to assume there's a structuralElement 3015 // and try to continue. 3016 addUnwrappedLine(); 3017 ++Line->Level; 3018 parseStructuralElement(); 3019 --Line->Level; 3020 } 3021 while (true) { 3022 if (FormatTok->is(tok::at)) 3023 nextToken(); 3024 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 3025 tok::kw___finally) || 3026 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3027 FormatTok->is(Keywords.kw_finally)) || 3028 (FormatTok->isObjCAtKeyword(tok::objc_catch) || 3029 FormatTok->isObjCAtKeyword(tok::objc_finally)))) { 3030 break; 3031 } 3032 nextToken(); 3033 while (FormatTok->isNot(tok::l_brace)) { 3034 if (FormatTok->is(tok::l_paren)) { 3035 parseParens(); 3036 continue; 3037 } 3038 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) { 3039 if (Style.RemoveBracesLLVM) 3040 NestedTooDeep.pop_back(); 3041 return; 3042 } 3043 nextToken(); 3044 } 3045 NeedsUnwrappedLine = false; 3046 Line->MustBeDeclaration = false; 3047 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3048 parseBlock(); 3049 if (Style.BraceWrapping.BeforeCatch) 3050 addUnwrappedLine(); 3051 else 3052 NeedsUnwrappedLine = true; 3053 } 3054 3055 if (Style.RemoveBracesLLVM) 3056 NestedTooDeep.pop_back(); 3057 3058 if (NeedsUnwrappedLine) 3059 addUnwrappedLine(); 3060 } 3061 3062 void UnwrappedLineParser::parseNamespace() { 3063 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 3064 "'namespace' expected"); 3065 3066 const FormatToken &InitialToken = *FormatTok; 3067 nextToken(); 3068 if (InitialToken.is(TT_NamespaceMacro)) { 3069 parseParens(); 3070 } else { 3071 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline, 3072 tok::l_square, tok::period, tok::l_paren) || 3073 (Style.isCSharp() && FormatTok->is(tok::kw_union))) { 3074 if (FormatTok->is(tok::l_square)) 3075 parseSquare(); 3076 else if (FormatTok->is(tok::l_paren)) 3077 parseParens(); 3078 else 3079 nextToken(); 3080 } 3081 } 3082 if (FormatTok->is(tok::l_brace)) { 3083 FormatTok->setFinalizedType(TT_NamespaceLBrace); 3084 3085 if (ShouldBreakBeforeBrace(Style, InitialToken)) 3086 addUnwrappedLine(); 3087 3088 unsigned AddLevels = 3089 Style.NamespaceIndentation == FormatStyle::NI_All || 3090 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 3091 DeclarationScopeStack.size() > 1) 3092 ? 1u 3093 : 0u; 3094 bool ManageWhitesmithsBraces = 3095 AddLevels == 0u && 3096 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 3097 3098 // If we're in Whitesmiths mode, indent the brace if we're not indenting 3099 // the whole block. 3100 if (ManageWhitesmithsBraces) 3101 ++Line->Level; 3102 3103 // Munch the semicolon after a namespace. This is more common than one would 3104 // think. Putting the semicolon into its own line is very ugly. 3105 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true, 3106 /*KeepBraces=*/true, /*IfKind=*/nullptr, 3107 ManageWhitesmithsBraces); 3108 3109 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep); 3110 3111 if (ManageWhitesmithsBraces) 3112 --Line->Level; 3113 } 3114 // FIXME: Add error handling. 3115 } 3116 3117 void UnwrappedLineParser::parseNew() { 3118 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 3119 nextToken(); 3120 3121 if (Style.isCSharp()) { 3122 do { 3123 // Handle constructor invocation, e.g. `new(field: value)`. 3124 if (FormatTok->is(tok::l_paren)) 3125 parseParens(); 3126 3127 // Handle array initialization syntax, e.g. `new[] {10, 20, 30}`. 3128 if (FormatTok->is(tok::l_brace)) 3129 parseBracedList(); 3130 3131 if (FormatTok->isOneOf(tok::semi, tok::comma)) 3132 return; 3133 3134 nextToken(); 3135 } while (!eof()); 3136 } 3137 3138 if (Style.Language != FormatStyle::LK_Java) 3139 return; 3140 3141 // In Java, we can parse everything up to the parens, which aren't optional. 3142 do { 3143 // There should not be a ;, { or } before the new's open paren. 3144 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 3145 return; 3146 3147 // Consume the parens. 3148 if (FormatTok->is(tok::l_paren)) { 3149 parseParens(); 3150 3151 // If there is a class body of an anonymous class, consume that as child. 3152 if (FormatTok->is(tok::l_brace)) 3153 parseChildBlock(); 3154 return; 3155 } 3156 nextToken(); 3157 } while (!eof()); 3158 } 3159 3160 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) { 3161 keepAncestorBraces(); 3162 3163 if (isBlockBegin(*FormatTok)) { 3164 FormatTok->setFinalizedType(TT_ControlStatementLBrace); 3165 FormatToken *LeftBrace = FormatTok; 3166 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3167 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u, 3168 /*MunchSemi=*/true, KeepBraces); 3169 setPreviousRBraceType(TT_ControlStatementRBrace); 3170 if (!KeepBraces) { 3171 assert(!NestedTooDeep.empty()); 3172 if (!NestedTooDeep.back()) 3173 markOptionalBraces(LeftBrace); 3174 } 3175 if (WrapRightBrace) 3176 addUnwrappedLine(); 3177 } else { 3178 parseUnbracedBody(); 3179 } 3180 3181 if (!KeepBraces) 3182 NestedTooDeep.pop_back(); 3183 } 3184 3185 void UnwrappedLineParser::parseForOrWhileLoop(bool HasParens) { 3186 assert((FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) || 3187 (Style.isVerilog() && 3188 FormatTok->isOneOf(Keywords.kw_always, Keywords.kw_always_comb, 3189 Keywords.kw_always_ff, Keywords.kw_always_latch, 3190 Keywords.kw_final, Keywords.kw_initial, 3191 Keywords.kw_foreach, Keywords.kw_forever, 3192 Keywords.kw_repeat))) && 3193 "'for', 'while' or foreach macro expected"); 3194 const bool KeepBraces = !Style.RemoveBracesLLVM || 3195 !FormatTok->isOneOf(tok::kw_for, tok::kw_while); 3196 3197 nextToken(); 3198 // JS' for await ( ... 3199 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await)) 3200 nextToken(); 3201 if (IsCpp && FormatTok->is(tok::kw_co_await)) 3202 nextToken(); 3203 if (HasParens && FormatTok->is(tok::l_paren)) { 3204 // The type is only set for Verilog basically because we were afraid to 3205 // change the existing behavior for loops. See the discussion on D121756 for 3206 // details. 3207 if (Style.isVerilog()) 3208 FormatTok->setFinalizedType(TT_ConditionLParen); 3209 parseParens(); 3210 } 3211 3212 if (Style.isVerilog()) { 3213 // Event control. 3214 parseVerilogSensitivityList(); 3215 } else if (Style.AllowShortLoopsOnASingleLine && FormatTok->is(tok::semi) && 3216 Tokens->getPreviousToken()->is(tok::r_paren)) { 3217 nextToken(); 3218 addUnwrappedLine(); 3219 return; 3220 } 3221 3222 handleAttributes(); 3223 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true); 3224 } 3225 3226 void UnwrappedLineParser::parseDoWhile() { 3227 assert(FormatTok->is(tok::kw_do) && "'do' expected"); 3228 nextToken(); 3229 3230 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile); 3231 3232 // FIXME: Add error handling. 3233 if (FormatTok->isNot(tok::kw_while)) { 3234 addUnwrappedLine(); 3235 return; 3236 } 3237 3238 FormatTok->setFinalizedType(TT_DoWhile); 3239 3240 // If in Whitesmiths mode, the line with the while() needs to be indented 3241 // to the same level as the block. 3242 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) 3243 ++Line->Level; 3244 3245 nextToken(); 3246 parseStructuralElement(); 3247 } 3248 3249 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) { 3250 nextToken(); 3251 unsigned OldLineLevel = Line->Level; 3252 3253 if (LeftAlignLabel) 3254 Line->Level = 0; 3255 else if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 3256 --Line->Level; 3257 3258 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() && 3259 FormatTok->is(tok::l_brace)) { 3260 3261 CompoundStatementIndenter Indenter(this, Line->Level, 3262 Style.BraceWrapping.AfterCaseLabel, 3263 Style.BraceWrapping.IndentBraces); 3264 parseBlock(); 3265 if (FormatTok->is(tok::kw_break)) { 3266 if (Style.BraceWrapping.AfterControlStatement == 3267 FormatStyle::BWACS_Always) { 3268 addUnwrappedLine(); 3269 if (!Style.IndentCaseBlocks && 3270 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) { 3271 ++Line->Level; 3272 } 3273 } 3274 parseStructuralElement(); 3275 } 3276 addUnwrappedLine(); 3277 } else { 3278 if (FormatTok->is(tok::semi)) 3279 nextToken(); 3280 addUnwrappedLine(); 3281 } 3282 Line->Level = OldLineLevel; 3283 if (FormatTok->isNot(tok::l_brace)) { 3284 parseStructuralElement(); 3285 addUnwrappedLine(); 3286 } 3287 } 3288 3289 void UnwrappedLineParser::parseCaseLabel() { 3290 assert(FormatTok->is(tok::kw_case) && "'case' expected"); 3291 auto *Case = FormatTok; 3292 3293 // FIXME: fix handling of complex expressions here. 3294 do { 3295 nextToken(); 3296 if (FormatTok->is(tok::colon)) { 3297 FormatTok->setFinalizedType(TT_CaseLabelColon); 3298 break; 3299 } 3300 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::arrow)) { 3301 FormatTok->setFinalizedType(TT_CaseLabelArrow); 3302 Case->setFinalizedType(TT_SwitchExpressionLabel); 3303 break; 3304 } 3305 } while (!eof()); 3306 parseLabel(); 3307 } 3308 3309 void UnwrappedLineParser::parseSwitch(bool IsExpr) { 3310 assert(FormatTok->is(tok::kw_switch) && "'switch' expected"); 3311 nextToken(); 3312 if (FormatTok->is(tok::l_paren)) 3313 parseParens(); 3314 3315 keepAncestorBraces(); 3316 3317 if (FormatTok->is(tok::l_brace)) { 3318 CompoundStatementIndenter Indenter(this, Style, Line->Level); 3319 FormatTok->setFinalizedType(IsExpr ? TT_SwitchExpressionLBrace 3320 : TT_ControlStatementLBrace); 3321 if (IsExpr) 3322 parseChildBlock(); 3323 else 3324 parseBlock(); 3325 setPreviousRBraceType(TT_ControlStatementRBrace); 3326 if (!IsExpr) 3327 addUnwrappedLine(); 3328 } else { 3329 addUnwrappedLine(); 3330 ++Line->Level; 3331 parseStructuralElement(); 3332 --Line->Level; 3333 } 3334 3335 if (Style.RemoveBracesLLVM) 3336 NestedTooDeep.pop_back(); 3337 } 3338 3339 // Operators that can follow a C variable. 3340 static bool isCOperatorFollowingVar(tok::TokenKind Kind) { 3341 switch (Kind) { 3342 case tok::ampamp: 3343 case tok::ampequal: 3344 case tok::arrow: 3345 case tok::caret: 3346 case tok::caretequal: 3347 case tok::comma: 3348 case tok::ellipsis: 3349 case tok::equal: 3350 case tok::equalequal: 3351 case tok::exclaim: 3352 case tok::exclaimequal: 3353 case tok::greater: 3354 case tok::greaterequal: 3355 case tok::greatergreater: 3356 case tok::greatergreaterequal: 3357 case tok::l_paren: 3358 case tok::l_square: 3359 case tok::less: 3360 case tok::lessequal: 3361 case tok::lessless: 3362 case tok::lesslessequal: 3363 case tok::minus: 3364 case tok::minusequal: 3365 case tok::minusminus: 3366 case tok::percent: 3367 case tok::percentequal: 3368 case tok::period: 3369 case tok::pipe: 3370 case tok::pipeequal: 3371 case tok::pipepipe: 3372 case tok::plus: 3373 case tok::plusequal: 3374 case tok::plusplus: 3375 case tok::question: 3376 case tok::r_brace: 3377 case tok::r_paren: 3378 case tok::r_square: 3379 case tok::semi: 3380 case tok::slash: 3381 case tok::slashequal: 3382 case tok::star: 3383 case tok::starequal: 3384 return true; 3385 default: 3386 return false; 3387 } 3388 } 3389 3390 void UnwrappedLineParser::parseAccessSpecifier() { 3391 FormatToken *AccessSpecifierCandidate = FormatTok; 3392 nextToken(); 3393 // Understand Qt's slots. 3394 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 3395 nextToken(); 3396 // Otherwise, we don't know what it is, and we'd better keep the next token. 3397 if (FormatTok->is(tok::colon)) { 3398 nextToken(); 3399 addUnwrappedLine(); 3400 } else if (FormatTok->isNot(tok::coloncolon) && 3401 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) { 3402 // Not a variable name nor namespace name. 3403 addUnwrappedLine(); 3404 } else if (AccessSpecifierCandidate) { 3405 // Consider the access specifier to be a C identifier. 3406 AccessSpecifierCandidate->Tok.setKind(tok::identifier); 3407 } 3408 } 3409 3410 /// \brief Parses a requires, decides if it is a clause or an expression. 3411 /// \pre The current token has to be the requires keyword. 3412 /// \returns true if it parsed a clause. 3413 bool UnwrappedLineParser::parseRequires() { 3414 assert(FormatTok->is(tok::kw_requires) && "'requires' expected"); 3415 auto RequiresToken = FormatTok; 3416 3417 // We try to guess if it is a requires clause, or a requires expression. For 3418 // that we first consume the keyword and check the next token. 3419 nextToken(); 3420 3421 switch (FormatTok->Tok.getKind()) { 3422 case tok::l_brace: 3423 // This can only be an expression, never a clause. 3424 parseRequiresExpression(RequiresToken); 3425 return false; 3426 case tok::l_paren: 3427 // Clauses and expression can start with a paren, it's unclear what we have. 3428 break; 3429 default: 3430 // All other tokens can only be a clause. 3431 parseRequiresClause(RequiresToken); 3432 return true; 3433 } 3434 3435 // Looking forward we would have to decide if there are function declaration 3436 // like arguments to the requires expression: 3437 // requires (T t) { 3438 // Or there is a constraint expression for the requires clause: 3439 // requires (C<T> && ... 3440 3441 // But first let's look behind. 3442 auto *PreviousNonComment = RequiresToken->getPreviousNonComment(); 3443 3444 if (!PreviousNonComment || 3445 PreviousNonComment->is(TT_RequiresExpressionLBrace)) { 3446 // If there is no token, or an expression left brace, we are a requires 3447 // clause within a requires expression. 3448 parseRequiresClause(RequiresToken); 3449 return true; 3450 } 3451 3452 switch (PreviousNonComment->Tok.getKind()) { 3453 case tok::greater: 3454 case tok::r_paren: 3455 case tok::kw_noexcept: 3456 case tok::kw_const: 3457 // This is a requires clause. 3458 parseRequiresClause(RequiresToken); 3459 return true; 3460 case tok::amp: 3461 case tok::ampamp: { 3462 // This can be either: 3463 // if (... && requires (T t) ...) 3464 // Or 3465 // void member(...) && requires (C<T> ... 3466 // We check the one token before that for a const: 3467 // void member(...) const && requires (C<T> ... 3468 auto PrevPrev = PreviousNonComment->getPreviousNonComment(); 3469 if (PrevPrev && PrevPrev->is(tok::kw_const)) { 3470 parseRequiresClause(RequiresToken); 3471 return true; 3472 } 3473 break; 3474 } 3475 default: 3476 if (PreviousNonComment->isTypeOrIdentifier(LangOpts)) { 3477 // This is a requires clause. 3478 parseRequiresClause(RequiresToken); 3479 return true; 3480 } 3481 // It's an expression. 3482 parseRequiresExpression(RequiresToken); 3483 return false; 3484 } 3485 3486 // Now we look forward and try to check if the paren content is a parameter 3487 // list. The parameters can be cv-qualified and contain references or 3488 // pointers. 3489 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds 3490 // of stuff: typename, const, *, &, &&, ::, identifiers. 3491 3492 unsigned StoredPosition = Tokens->getPosition(); 3493 FormatToken *NextToken = Tokens->getNextToken(); 3494 int Lookahead = 0; 3495 auto PeekNext = [&Lookahead, &NextToken, this] { 3496 ++Lookahead; 3497 NextToken = Tokens->getNextToken(); 3498 }; 3499 3500 bool FoundType = false; 3501 bool LastWasColonColon = false; 3502 int OpenAngles = 0; 3503 3504 for (; Lookahead < 50; PeekNext()) { 3505 switch (NextToken->Tok.getKind()) { 3506 case tok::kw_volatile: 3507 case tok::kw_const: 3508 case tok::comma: 3509 if (OpenAngles == 0) { 3510 FormatTok = Tokens->setPosition(StoredPosition); 3511 parseRequiresExpression(RequiresToken); 3512 return false; 3513 } 3514 break; 3515 case tok::eof: 3516 // Break out of the loop. 3517 Lookahead = 50; 3518 break; 3519 case tok::coloncolon: 3520 LastWasColonColon = true; 3521 break; 3522 case tok::kw_decltype: 3523 case tok::identifier: 3524 if (FoundType && !LastWasColonColon && OpenAngles == 0) { 3525 FormatTok = Tokens->setPosition(StoredPosition); 3526 parseRequiresExpression(RequiresToken); 3527 return false; 3528 } 3529 FoundType = true; 3530 LastWasColonColon = false; 3531 break; 3532 case tok::less: 3533 ++OpenAngles; 3534 break; 3535 case tok::greater: 3536 --OpenAngles; 3537 break; 3538 default: 3539 if (NextToken->isTypeName(LangOpts)) { 3540 FormatTok = Tokens->setPosition(StoredPosition); 3541 parseRequiresExpression(RequiresToken); 3542 return false; 3543 } 3544 break; 3545 } 3546 } 3547 // This seems to be a complicated expression, just assume it's a clause. 3548 FormatTok = Tokens->setPosition(StoredPosition); 3549 parseRequiresClause(RequiresToken); 3550 return true; 3551 } 3552 3553 /// \brief Parses a requires clause. 3554 /// \param RequiresToken The requires keyword token, which starts this clause. 3555 /// \pre We need to be on the next token after the requires keyword. 3556 /// \sa parseRequiresExpression 3557 /// 3558 /// Returns if it either has finished parsing the clause, or it detects, that 3559 /// the clause is incorrect. 3560 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) { 3561 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3562 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3563 3564 // If there is no previous token, we are within a requires expression, 3565 // otherwise we will always have the template or function declaration in front 3566 // of it. 3567 bool InRequiresExpression = 3568 !RequiresToken->Previous || 3569 RequiresToken->Previous->is(TT_RequiresExpressionLBrace); 3570 3571 RequiresToken->setFinalizedType(InRequiresExpression 3572 ? TT_RequiresClauseInARequiresExpression 3573 : TT_RequiresClause); 3574 3575 // NOTE: parseConstraintExpression is only ever called from this function. 3576 // It could be inlined into here. 3577 parseConstraintExpression(); 3578 3579 if (!InRequiresExpression) 3580 FormatTok->Previous->ClosesRequiresClause = true; 3581 } 3582 3583 /// \brief Parses a requires expression. 3584 /// \param RequiresToken The requires keyword token, which starts this clause. 3585 /// \pre We need to be on the next token after the requires keyword. 3586 /// \sa parseRequiresClause 3587 /// 3588 /// Returns if it either has finished parsing the expression, or it detects, 3589 /// that the expression is incorrect. 3590 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) { 3591 assert(FormatTok->getPreviousNonComment() == RequiresToken); 3592 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected"); 3593 3594 RequiresToken->setFinalizedType(TT_RequiresExpression); 3595 3596 if (FormatTok->is(tok::l_paren)) { 3597 FormatTok->setFinalizedType(TT_RequiresExpressionLParen); 3598 parseParens(); 3599 } 3600 3601 if (FormatTok->is(tok::l_brace)) { 3602 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace); 3603 parseChildBlock(); 3604 } 3605 } 3606 3607 /// \brief Parses a constraint expression. 3608 /// 3609 /// This is the body of a requires clause. It returns, when the parsing is 3610 /// complete, or the expression is incorrect. 3611 void UnwrappedLineParser::parseConstraintExpression() { 3612 // The special handling for lambdas is needed since tryToParseLambda() eats a 3613 // token and if a requires expression is the last part of a requires clause 3614 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is 3615 // not set on the correct token. Thus we need to be aware if we even expect a 3616 // lambda to be possible. 3617 // template <typename T> requires requires { ... } [[nodiscard]] ...; 3618 bool LambdaNextTimeAllowed = true; 3619 3620 // Within lambda declarations, it is permitted to put a requires clause after 3621 // its template parameter list, which would place the requires clause right 3622 // before the parentheses of the parameters of the lambda declaration. Thus, 3623 // we track if we expect to see grouping parentheses at all. 3624 // Without this check, `requires foo<T> (T t)` in the below example would be 3625 // seen as the whole requires clause, accidentally eating the parameters of 3626 // the lambda. 3627 // [&]<typename T> requires foo<T> (T t) { ... }; 3628 bool TopLevelParensAllowed = true; 3629 3630 do { 3631 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false); 3632 3633 switch (FormatTok->Tok.getKind()) { 3634 case tok::kw_requires: { 3635 auto RequiresToken = FormatTok; 3636 nextToken(); 3637 parseRequiresExpression(RequiresToken); 3638 break; 3639 } 3640 3641 case tok::l_paren: 3642 if (!TopLevelParensAllowed) 3643 return; 3644 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator); 3645 TopLevelParensAllowed = false; 3646 break; 3647 3648 case tok::l_square: 3649 if (!LambdaThisTimeAllowed || !tryToParseLambda()) 3650 return; 3651 break; 3652 3653 case tok::kw_const: 3654 case tok::semi: 3655 case tok::kw_class: 3656 case tok::kw_struct: 3657 case tok::kw_union: 3658 return; 3659 3660 case tok::l_brace: 3661 // Potential function body. 3662 return; 3663 3664 case tok::ampamp: 3665 case tok::pipepipe: 3666 FormatTok->setFinalizedType(TT_BinaryOperator); 3667 nextToken(); 3668 LambdaNextTimeAllowed = true; 3669 TopLevelParensAllowed = true; 3670 break; 3671 3672 case tok::comma: 3673 case tok::comment: 3674 LambdaNextTimeAllowed = LambdaThisTimeAllowed; 3675 nextToken(); 3676 break; 3677 3678 case tok::kw_sizeof: 3679 case tok::greater: 3680 case tok::greaterequal: 3681 case tok::greatergreater: 3682 case tok::less: 3683 case tok::lessequal: 3684 case tok::lessless: 3685 case tok::equalequal: 3686 case tok::exclaim: 3687 case tok::exclaimequal: 3688 case tok::plus: 3689 case tok::minus: 3690 case tok::star: 3691 case tok::slash: 3692 LambdaNextTimeAllowed = true; 3693 TopLevelParensAllowed = true; 3694 // Just eat them. 3695 nextToken(); 3696 break; 3697 3698 case tok::numeric_constant: 3699 case tok::coloncolon: 3700 case tok::kw_true: 3701 case tok::kw_false: 3702 TopLevelParensAllowed = false; 3703 // Just eat them. 3704 nextToken(); 3705 break; 3706 3707 case tok::kw_static_cast: 3708 case tok::kw_const_cast: 3709 case tok::kw_reinterpret_cast: 3710 case tok::kw_dynamic_cast: 3711 nextToken(); 3712 if (FormatTok->isNot(tok::less)) 3713 return; 3714 3715 nextToken(); 3716 parseBracedList(/*IsAngleBracket=*/true); 3717 break; 3718 3719 default: 3720 if (!FormatTok->Tok.getIdentifierInfo()) { 3721 // Identifiers are part of the default case, we check for more then 3722 // tok::identifier to handle builtin type traits. 3723 return; 3724 } 3725 3726 // We need to differentiate identifiers for a template deduction guide, 3727 // variables, or function return types (the constraint expression has 3728 // ended before that), and basically all other cases. But it's easier to 3729 // check the other way around. 3730 assert(FormatTok->Previous); 3731 switch (FormatTok->Previous->Tok.getKind()) { 3732 case tok::coloncolon: // Nested identifier. 3733 case tok::ampamp: // Start of a function or variable for the 3734 case tok::pipepipe: // constraint expression. (binary) 3735 case tok::exclaim: // The same as above, but unary. 3736 case tok::kw_requires: // Initial identifier of a requires clause. 3737 case tok::equal: // Initial identifier of a concept declaration. 3738 break; 3739 default: 3740 return; 3741 } 3742 3743 // Read identifier with optional template declaration. 3744 nextToken(); 3745 if (FormatTok->is(tok::less)) { 3746 nextToken(); 3747 parseBracedList(/*IsAngleBracket=*/true); 3748 } 3749 TopLevelParensAllowed = false; 3750 break; 3751 } 3752 } while (!eof()); 3753 } 3754 3755 bool UnwrappedLineParser::parseEnum() { 3756 const FormatToken &InitialToken = *FormatTok; 3757 3758 // Won't be 'enum' for NS_ENUMs. 3759 if (FormatTok->is(tok::kw_enum)) 3760 nextToken(); 3761 3762 // In TypeScript, "enum" can also be used as property name, e.g. in interface 3763 // declarations. An "enum" keyword followed by a colon would be a syntax 3764 // error and thus assume it is just an identifier. 3765 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question)) 3766 return false; 3767 3768 // In protobuf, "enum" can be used as a field name. 3769 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal)) 3770 return false; 3771 3772 if (IsCpp) { 3773 // Eat up enum class ... 3774 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct)) 3775 nextToken(); 3776 while (FormatTok->is(tok::l_square)) 3777 if (!handleCppAttributes()) 3778 return false; 3779 } 3780 3781 while (FormatTok->Tok.getIdentifierInfo() || 3782 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 3783 tok::greater, tok::comma, tok::question, 3784 tok::l_square)) { 3785 if (Style.isVerilog()) { 3786 FormatTok->setFinalizedType(TT_VerilogDimensionedTypeName); 3787 nextToken(); 3788 // In Verilog the base type can have dimensions. 3789 while (FormatTok->is(tok::l_square)) 3790 parseSquare(); 3791 } else { 3792 nextToken(); 3793 } 3794 // We can have macros or attributes in between 'enum' and the enum name. 3795 if (FormatTok->is(tok::l_paren)) 3796 parseParens(); 3797 if (FormatTok->is(tok::identifier)) { 3798 nextToken(); 3799 // If there are two identifiers in a row, this is likely an elaborate 3800 // return type. In Java, this can be "implements", etc. 3801 if (IsCpp && FormatTok->is(tok::identifier)) 3802 return false; 3803 } 3804 } 3805 3806 // Just a declaration or something is wrong. 3807 if (FormatTok->isNot(tok::l_brace)) 3808 return true; 3809 FormatTok->setFinalizedType(TT_EnumLBrace); 3810 FormatTok->setBlockKind(BK_Block); 3811 3812 if (Style.Language == FormatStyle::LK_Java) { 3813 // Java enums are different. 3814 parseJavaEnumBody(); 3815 return true; 3816 } 3817 if (Style.Language == FormatStyle::LK_Proto) { 3818 parseBlock(/*MustBeDeclaration=*/true); 3819 return true; 3820 } 3821 3822 if (!Style.AllowShortEnumsOnASingleLine && 3823 ShouldBreakBeforeBrace(Style, InitialToken)) { 3824 addUnwrappedLine(); 3825 } 3826 // Parse enum body. 3827 nextToken(); 3828 if (!Style.AllowShortEnumsOnASingleLine) { 3829 addUnwrappedLine(); 3830 Line->Level += 1; 3831 } 3832 bool HasError = !parseBracedList(/*IsAngleBracket=*/false, /*IsEnum=*/true); 3833 if (!Style.AllowShortEnumsOnASingleLine) 3834 Line->Level -= 1; 3835 if (HasError) { 3836 if (FormatTok->is(tok::semi)) 3837 nextToken(); 3838 addUnwrappedLine(); 3839 } 3840 setPreviousRBraceType(TT_EnumRBrace); 3841 return true; 3842 3843 // There is no addUnwrappedLine() here so that we fall through to parsing a 3844 // structural element afterwards. Thus, in "enum A {} n, m;", 3845 // "} n, m;" will end up in one unwrapped line. 3846 } 3847 3848 bool UnwrappedLineParser::parseStructLike() { 3849 // parseRecord falls through and does not yet add an unwrapped line as a 3850 // record declaration or definition can start a structural element. 3851 parseRecord(); 3852 // This does not apply to Java, JavaScript and C#. 3853 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() || 3854 Style.isCSharp()) { 3855 if (FormatTok->is(tok::semi)) 3856 nextToken(); 3857 addUnwrappedLine(); 3858 return true; 3859 } 3860 return false; 3861 } 3862 3863 namespace { 3864 // A class used to set and restore the Token position when peeking 3865 // ahead in the token source. 3866 class ScopedTokenPosition { 3867 unsigned StoredPosition; 3868 FormatTokenSource *Tokens; 3869 3870 public: 3871 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) { 3872 assert(Tokens && "Tokens expected to not be null"); 3873 StoredPosition = Tokens->getPosition(); 3874 } 3875 3876 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); } 3877 }; 3878 } // namespace 3879 3880 // Look to see if we have [[ by looking ahead, if 3881 // its not then rewind to the original position. 3882 bool UnwrappedLineParser::tryToParseSimpleAttribute() { 3883 ScopedTokenPosition AutoPosition(Tokens); 3884 FormatToken *Tok = Tokens->getNextToken(); 3885 // We already read the first [ check for the second. 3886 if (Tok->isNot(tok::l_square)) 3887 return false; 3888 // Double check that the attribute is just something 3889 // fairly simple. 3890 while (Tok->isNot(tok::eof)) { 3891 if (Tok->is(tok::r_square)) 3892 break; 3893 Tok = Tokens->getNextToken(); 3894 } 3895 if (Tok->is(tok::eof)) 3896 return false; 3897 Tok = Tokens->getNextToken(); 3898 if (Tok->isNot(tok::r_square)) 3899 return false; 3900 Tok = Tokens->getNextToken(); 3901 if (Tok->is(tok::semi)) 3902 return false; 3903 return true; 3904 } 3905 3906 void UnwrappedLineParser::parseJavaEnumBody() { 3907 assert(FormatTok->is(tok::l_brace)); 3908 const FormatToken *OpeningBrace = FormatTok; 3909 3910 // Determine whether the enum is simple, i.e. does not have a semicolon or 3911 // constants with class bodies. Simple enums can be formatted like braced 3912 // lists, contracted to a single line, etc. 3913 unsigned StoredPosition = Tokens->getPosition(); 3914 bool IsSimple = true; 3915 FormatToken *Tok = Tokens->getNextToken(); 3916 while (Tok->isNot(tok::eof)) { 3917 if (Tok->is(tok::r_brace)) 3918 break; 3919 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 3920 IsSimple = false; 3921 break; 3922 } 3923 // FIXME: This will also mark enums with braces in the arguments to enum 3924 // constants as "not simple". This is probably fine in practice, though. 3925 Tok = Tokens->getNextToken(); 3926 } 3927 FormatTok = Tokens->setPosition(StoredPosition); 3928 3929 if (IsSimple) { 3930 nextToken(); 3931 parseBracedList(); 3932 addUnwrappedLine(); 3933 return; 3934 } 3935 3936 // Parse the body of a more complex enum. 3937 // First add a line for everything up to the "{". 3938 nextToken(); 3939 addUnwrappedLine(); 3940 ++Line->Level; 3941 3942 // Parse the enum constants. 3943 while (!eof()) { 3944 if (FormatTok->is(tok::l_brace)) { 3945 // Parse the constant's class body. 3946 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u, 3947 /*MunchSemi=*/false); 3948 } else if (FormatTok->is(tok::l_paren)) { 3949 parseParens(); 3950 } else if (FormatTok->is(tok::comma)) { 3951 nextToken(); 3952 addUnwrappedLine(); 3953 } else if (FormatTok->is(tok::semi)) { 3954 nextToken(); 3955 addUnwrappedLine(); 3956 break; 3957 } else if (FormatTok->is(tok::r_brace)) { 3958 addUnwrappedLine(); 3959 break; 3960 } else { 3961 nextToken(); 3962 } 3963 } 3964 3965 // Parse the class body after the enum's ";" if any. 3966 parseLevel(OpeningBrace); 3967 nextToken(); 3968 --Line->Level; 3969 addUnwrappedLine(); 3970 } 3971 3972 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { 3973 const FormatToken &InitialToken = *FormatTok; 3974 nextToken(); 3975 3976 const FormatToken *ClassName = nullptr; 3977 bool IsDerived = false; 3978 auto IsNonMacroIdentifier = [](const FormatToken *Tok) { 3979 return Tok->is(tok::identifier) && Tok->TokenText != Tok->TokenText.upper(); 3980 }; 3981 // The actual identifier can be a nested name specifier, and in macros 3982 // it is often token-pasted. 3983 // An [[attribute]] can be before the identifier. 3984 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 3985 tok::kw_alignas, tok::l_square) || 3986 FormatTok->isAttribute() || 3987 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) && 3988 FormatTok->isOneOf(tok::period, tok::comma))) { 3989 if (Style.isJavaScript() && 3990 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { 3991 // JavaScript/TypeScript supports inline object types in 3992 // extends/implements positions: 3993 // class Foo implements {bar: number} { } 3994 nextToken(); 3995 if (FormatTok->is(tok::l_brace)) { 3996 tryToParseBracedList(); 3997 continue; 3998 } 3999 } 4000 if (FormatTok->is(tok::l_square) && handleCppAttributes()) 4001 continue; 4002 const auto *Previous = FormatTok; 4003 nextToken(); 4004 switch (FormatTok->Tok.getKind()) { 4005 case tok::l_paren: 4006 // We can have macros in between 'class' and the class name. 4007 if (!IsNonMacroIdentifier(Previous) || 4008 // e.g. `struct macro(a) S { int i; };` 4009 Previous->Previous == &InitialToken) { 4010 parseParens(); 4011 } 4012 break; 4013 case tok::coloncolon: 4014 break; 4015 default: 4016 if (!ClassName && Previous->is(tok::identifier) && 4017 Previous->isNot(TT_AttributeMacro)) { 4018 ClassName = Previous; 4019 } 4020 } 4021 } 4022 4023 auto IsListInitialization = [&] { 4024 if (!ClassName || IsDerived) 4025 return false; 4026 assert(FormatTok->is(tok::l_brace)); 4027 const auto *Prev = FormatTok->getPreviousNonComment(); 4028 assert(Prev); 4029 return Prev != ClassName && Prev->is(tok::identifier) && 4030 Prev->isNot(Keywords.kw_final) && tryToParseBracedList(); 4031 }; 4032 4033 if (FormatTok->isOneOf(tok::colon, tok::less)) { 4034 int AngleNestingLevel = 0; 4035 do { 4036 if (FormatTok->is(tok::less)) 4037 ++AngleNestingLevel; 4038 else if (FormatTok->is(tok::greater)) 4039 --AngleNestingLevel; 4040 4041 if (AngleNestingLevel == 0) { 4042 if (FormatTok->is(tok::colon)) { 4043 IsDerived = true; 4044 } else if (FormatTok->is(tok::identifier) && 4045 FormatTok->Previous->is(tok::coloncolon)) { 4046 ClassName = FormatTok; 4047 } else if (FormatTok->is(tok::l_paren) && 4048 IsNonMacroIdentifier(FormatTok->Previous)) { 4049 break; 4050 } 4051 } 4052 if (FormatTok->is(tok::l_brace)) { 4053 if (AngleNestingLevel == 0 && IsListInitialization()) 4054 return; 4055 calculateBraceTypes(/*ExpectClassBody=*/true); 4056 if (!tryToParseBracedList()) 4057 break; 4058 } 4059 if (FormatTok->is(tok::l_square)) { 4060 FormatToken *Previous = FormatTok->Previous; 4061 if (!Previous || (Previous->isNot(tok::r_paren) && 4062 !Previous->isTypeOrIdentifier(LangOpts))) { 4063 // Don't try parsing a lambda if we had a closing parenthesis before, 4064 // it was probably a pointer to an array: int (*)[]. 4065 if (!tryToParseLambda()) 4066 continue; 4067 } else { 4068 parseSquare(); 4069 continue; 4070 } 4071 } 4072 if (FormatTok->is(tok::semi)) 4073 return; 4074 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) { 4075 addUnwrappedLine(); 4076 nextToken(); 4077 parseCSharpGenericTypeConstraint(); 4078 break; 4079 } 4080 nextToken(); 4081 } while (!eof()); 4082 } 4083 4084 auto GetBraceTypes = 4085 [](const FormatToken &RecordTok) -> std::pair<TokenType, TokenType> { 4086 switch (RecordTok.Tok.getKind()) { 4087 case tok::kw_class: 4088 return {TT_ClassLBrace, TT_ClassRBrace}; 4089 case tok::kw_struct: 4090 return {TT_StructLBrace, TT_StructRBrace}; 4091 case tok::kw_union: 4092 return {TT_UnionLBrace, TT_UnionRBrace}; 4093 default: 4094 // Useful for e.g. interface. 4095 return {TT_RecordLBrace, TT_RecordRBrace}; 4096 } 4097 }; 4098 if (FormatTok->is(tok::l_brace)) { 4099 if (IsListInitialization()) 4100 return; 4101 auto [OpenBraceType, ClosingBraceType] = GetBraceTypes(InitialToken); 4102 FormatTok->setFinalizedType(OpenBraceType); 4103 if (ParseAsExpr) { 4104 parseChildBlock(); 4105 } else { 4106 if (ShouldBreakBeforeBrace(Style, InitialToken)) 4107 addUnwrappedLine(); 4108 4109 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u; 4110 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false); 4111 } 4112 setPreviousRBraceType(ClosingBraceType); 4113 } 4114 // There is no addUnwrappedLine() here so that we fall through to parsing a 4115 // structural element afterwards. Thus, in "class A {} n, m;", 4116 // "} n, m;" will end up in one unwrapped line. 4117 } 4118 4119 void UnwrappedLineParser::parseObjCMethod() { 4120 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) && 4121 "'(' or identifier expected."); 4122 do { 4123 if (FormatTok->is(tok::semi)) { 4124 nextToken(); 4125 addUnwrappedLine(); 4126 return; 4127 } else if (FormatTok->is(tok::l_brace)) { 4128 if (Style.BraceWrapping.AfterFunction) 4129 addUnwrappedLine(); 4130 parseBlock(); 4131 addUnwrappedLine(); 4132 return; 4133 } else { 4134 nextToken(); 4135 } 4136 } while (!eof()); 4137 } 4138 4139 void UnwrappedLineParser::parseObjCProtocolList() { 4140 assert(FormatTok->is(tok::less) && "'<' expected."); 4141 do { 4142 nextToken(); 4143 // Early exit in case someone forgot a close angle. 4144 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4145 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4146 return; 4147 } 4148 } while (!eof() && FormatTok->isNot(tok::greater)); 4149 nextToken(); // Skip '>'. 4150 } 4151 4152 void UnwrappedLineParser::parseObjCUntilAtEnd() { 4153 do { 4154 if (FormatTok->isObjCAtKeyword(tok::objc_end)) { 4155 nextToken(); 4156 addUnwrappedLine(); 4157 break; 4158 } 4159 if (FormatTok->is(tok::l_brace)) { 4160 parseBlock(); 4161 // In ObjC interfaces, nothing should be following the "}". 4162 addUnwrappedLine(); 4163 } else if (FormatTok->is(tok::r_brace)) { 4164 // Ignore stray "}". parseStructuralElement doesn't consume them. 4165 nextToken(); 4166 addUnwrappedLine(); 4167 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { 4168 nextToken(); 4169 parseObjCMethod(); 4170 } else { 4171 parseStructuralElement(); 4172 } 4173 } while (!eof()); 4174 } 4175 4176 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 4177 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || 4178 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); 4179 nextToken(); 4180 nextToken(); // interface name 4181 4182 // @interface can be followed by a lightweight generic 4183 // specialization list, then either a base class or a category. 4184 if (FormatTok->is(tok::less)) 4185 parseObjCLightweightGenerics(); 4186 if (FormatTok->is(tok::colon)) { 4187 nextToken(); 4188 nextToken(); // base class name 4189 // The base class can also have lightweight generics applied to it. 4190 if (FormatTok->is(tok::less)) 4191 parseObjCLightweightGenerics(); 4192 } else if (FormatTok->is(tok::l_paren)) { 4193 // Skip category, if present. 4194 parseParens(); 4195 } 4196 4197 if (FormatTok->is(tok::less)) 4198 parseObjCProtocolList(); 4199 4200 if (FormatTok->is(tok::l_brace)) { 4201 if (Style.BraceWrapping.AfterObjCDeclaration) 4202 addUnwrappedLine(); 4203 parseBlock(/*MustBeDeclaration=*/true); 4204 } 4205 4206 // With instance variables, this puts '}' on its own line. Without instance 4207 // variables, this ends the @interface line. 4208 addUnwrappedLine(); 4209 4210 parseObjCUntilAtEnd(); 4211 } 4212 4213 void UnwrappedLineParser::parseObjCLightweightGenerics() { 4214 assert(FormatTok->is(tok::less)); 4215 // Unlike protocol lists, generic parameterizations support 4216 // nested angles: 4217 // 4218 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : 4219 // NSObject <NSCopying, NSSecureCoding> 4220 // 4221 // so we need to count how many open angles we have left. 4222 unsigned NumOpenAngles = 1; 4223 do { 4224 nextToken(); 4225 // Early exit in case someone forgot a close angle. 4226 if (FormatTok->isOneOf(tok::semi, tok::l_brace) || 4227 FormatTok->isObjCAtKeyword(tok::objc_end)) { 4228 break; 4229 } 4230 if (FormatTok->is(tok::less)) { 4231 ++NumOpenAngles; 4232 } else if (FormatTok->is(tok::greater)) { 4233 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); 4234 --NumOpenAngles; 4235 } 4236 } while (!eof() && NumOpenAngles != 0); 4237 nextToken(); // Skip '>'. 4238 } 4239 4240 // Returns true for the declaration/definition form of @protocol, 4241 // false for the expression form. 4242 bool UnwrappedLineParser::parseObjCProtocol() { 4243 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); 4244 nextToken(); 4245 4246 if (FormatTok->is(tok::l_paren)) { 4247 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". 4248 return false; 4249 } 4250 4251 // The definition/declaration form, 4252 // @protocol Foo 4253 // - (int)someMethod; 4254 // @end 4255 4256 nextToken(); // protocol name 4257 4258 if (FormatTok->is(tok::less)) 4259 parseObjCProtocolList(); 4260 4261 // Check for protocol declaration. 4262 if (FormatTok->is(tok::semi)) { 4263 nextToken(); 4264 addUnwrappedLine(); 4265 return true; 4266 } 4267 4268 addUnwrappedLine(); 4269 parseObjCUntilAtEnd(); 4270 return true; 4271 } 4272 4273 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 4274 bool IsImport = FormatTok->is(Keywords.kw_import); 4275 assert(IsImport || FormatTok->is(tok::kw_export)); 4276 nextToken(); 4277 4278 // Consume the "default" in "export default class/function". 4279 if (FormatTok->is(tok::kw_default)) 4280 nextToken(); 4281 4282 // Consume "async function", "function" and "default function", so that these 4283 // get parsed as free-standing JS functions, i.e. do not require a trailing 4284 // semicolon. 4285 if (FormatTok->is(Keywords.kw_async)) 4286 nextToken(); 4287 if (FormatTok->is(Keywords.kw_function)) { 4288 nextToken(); 4289 return; 4290 } 4291 4292 // For imports, `export *`, `export {...}`, consume the rest of the line up 4293 // to the terminating `;`. For everything else, just return and continue 4294 // parsing the structural element, i.e. the declaration or expression for 4295 // `export default`. 4296 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && 4297 !FormatTok->isStringLiteral() && 4298 !(FormatTok->is(Keywords.kw_type) && 4299 Tokens->peekNextToken()->isOneOf(tok::l_brace, tok::star))) { 4300 return; 4301 } 4302 4303 while (!eof()) { 4304 if (FormatTok->is(tok::semi)) 4305 return; 4306 if (Line->Tokens.empty()) { 4307 // Common issue: Automatic Semicolon Insertion wrapped the line, so the 4308 // import statement should terminate. 4309 return; 4310 } 4311 if (FormatTok->is(tok::l_brace)) { 4312 FormatTok->setBlockKind(BK_Block); 4313 nextToken(); 4314 parseBracedList(); 4315 } else { 4316 nextToken(); 4317 } 4318 } 4319 } 4320 4321 void UnwrappedLineParser::parseStatementMacro() { 4322 nextToken(); 4323 if (FormatTok->is(tok::l_paren)) 4324 parseParens(); 4325 if (FormatTok->is(tok::semi)) 4326 nextToken(); 4327 addUnwrappedLine(); 4328 } 4329 4330 void UnwrappedLineParser::parseVerilogHierarchyIdentifier() { 4331 // consume things like a::`b.c[d:e] or a::* 4332 while (true) { 4333 if (FormatTok->isOneOf(tok::star, tok::period, tok::periodstar, 4334 tok::coloncolon, tok::hash) || 4335 Keywords.isVerilogIdentifier(*FormatTok)) { 4336 nextToken(); 4337 } else if (FormatTok->is(tok::l_square)) { 4338 parseSquare(); 4339 } else { 4340 break; 4341 } 4342 } 4343 } 4344 4345 void UnwrappedLineParser::parseVerilogSensitivityList() { 4346 if (FormatTok->isNot(tok::at)) 4347 return; 4348 nextToken(); 4349 // A block event expression has 2 at signs. 4350 if (FormatTok->is(tok::at)) 4351 nextToken(); 4352 switch (FormatTok->Tok.getKind()) { 4353 case tok::star: 4354 nextToken(); 4355 break; 4356 case tok::l_paren: 4357 parseParens(); 4358 break; 4359 default: 4360 parseVerilogHierarchyIdentifier(); 4361 break; 4362 } 4363 } 4364 4365 unsigned UnwrappedLineParser::parseVerilogHierarchyHeader() { 4366 unsigned AddLevels = 0; 4367 4368 if (FormatTok->is(Keywords.kw_clocking)) { 4369 nextToken(); 4370 if (Keywords.isVerilogIdentifier(*FormatTok)) 4371 nextToken(); 4372 parseVerilogSensitivityList(); 4373 if (FormatTok->is(tok::semi)) 4374 nextToken(); 4375 } else if (FormatTok->isOneOf(tok::kw_case, Keywords.kw_casex, 4376 Keywords.kw_casez, Keywords.kw_randcase, 4377 Keywords.kw_randsequence)) { 4378 if (Style.IndentCaseLabels) 4379 AddLevels++; 4380 nextToken(); 4381 if (FormatTok->is(tok::l_paren)) { 4382 FormatTok->setFinalizedType(TT_ConditionLParen); 4383 parseParens(); 4384 } 4385 if (FormatTok->isOneOf(Keywords.kw_inside, Keywords.kw_matches)) 4386 nextToken(); 4387 // The case header has no semicolon. 4388 } else { 4389 // "module" etc. 4390 nextToken(); 4391 // all the words like the name of the module and specifiers like 4392 // "automatic" and the width of function return type 4393 while (true) { 4394 if (FormatTok->is(tok::l_square)) { 4395 auto Prev = FormatTok->getPreviousNonComment(); 4396 if (Prev && Keywords.isVerilogIdentifier(*Prev)) 4397 Prev->setFinalizedType(TT_VerilogDimensionedTypeName); 4398 parseSquare(); 4399 } else if (Keywords.isVerilogIdentifier(*FormatTok) || 4400 FormatTok->isOneOf(Keywords.kw_automatic, tok::kw_static)) { 4401 nextToken(); 4402 } else { 4403 break; 4404 } 4405 } 4406 4407 auto NewLine = [this]() { 4408 addUnwrappedLine(); 4409 Line->IsContinuation = true; 4410 }; 4411 4412 // package imports 4413 while (FormatTok->is(Keywords.kw_import)) { 4414 NewLine(); 4415 nextToken(); 4416 parseVerilogHierarchyIdentifier(); 4417 if (FormatTok->is(tok::semi)) 4418 nextToken(); 4419 } 4420 4421 // parameters and ports 4422 if (FormatTok->is(Keywords.kw_verilogHash)) { 4423 NewLine(); 4424 nextToken(); 4425 if (FormatTok->is(tok::l_paren)) { 4426 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4427 parseParens(); 4428 } 4429 } 4430 if (FormatTok->is(tok::l_paren)) { 4431 NewLine(); 4432 FormatTok->setFinalizedType(TT_VerilogMultiLineListLParen); 4433 parseParens(); 4434 } 4435 4436 // extends and implements 4437 if (FormatTok->is(Keywords.kw_extends)) { 4438 NewLine(); 4439 nextToken(); 4440 parseVerilogHierarchyIdentifier(); 4441 if (FormatTok->is(tok::l_paren)) 4442 parseParens(); 4443 } 4444 if (FormatTok->is(Keywords.kw_implements)) { 4445 NewLine(); 4446 do { 4447 nextToken(); 4448 parseVerilogHierarchyIdentifier(); 4449 } while (FormatTok->is(tok::comma)); 4450 } 4451 4452 // Coverage event for cover groups. 4453 if (FormatTok->is(tok::at)) { 4454 NewLine(); 4455 parseVerilogSensitivityList(); 4456 } 4457 4458 if (FormatTok->is(tok::semi)) 4459 nextToken(/*LevelDifference=*/1); 4460 addUnwrappedLine(); 4461 } 4462 4463 return AddLevels; 4464 } 4465 4466 void UnwrappedLineParser::parseVerilogTable() { 4467 assert(FormatTok->is(Keywords.kw_table)); 4468 nextToken(/*LevelDifference=*/1); 4469 addUnwrappedLine(); 4470 4471 auto InitialLevel = Line->Level++; 4472 while (!eof() && !Keywords.isVerilogEnd(*FormatTok)) { 4473 FormatToken *Tok = FormatTok; 4474 nextToken(); 4475 if (Tok->is(tok::semi)) 4476 addUnwrappedLine(); 4477 else if (Tok->isOneOf(tok::star, tok::colon, tok::question, tok::minus)) 4478 Tok->setFinalizedType(TT_VerilogTableItem); 4479 } 4480 Line->Level = InitialLevel; 4481 nextToken(/*LevelDifference=*/-1); 4482 addUnwrappedLine(); 4483 } 4484 4485 void UnwrappedLineParser::parseVerilogCaseLabel() { 4486 // The label will get unindented in AnnotatingParser. If there are no leading 4487 // spaces, indent the rest here so that things inside the block will be 4488 // indented relative to things outside. We don't use parseLabel because we 4489 // don't know whether this colon is a label or a ternary expression at this 4490 // point. 4491 auto OrigLevel = Line->Level; 4492 auto FirstLine = CurrentLines->size(); 4493 if (Line->Level == 0 || (Line->InPPDirective && Line->Level <= 1)) 4494 ++Line->Level; 4495 else if (!Style.IndentCaseBlocks && Keywords.isVerilogBegin(*FormatTok)) 4496 --Line->Level; 4497 parseStructuralElement(); 4498 // Restore the indentation in both the new line and the line that has the 4499 // label. 4500 if (CurrentLines->size() > FirstLine) 4501 (*CurrentLines)[FirstLine].Level = OrigLevel; 4502 Line->Level = OrigLevel; 4503 } 4504 4505 bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { 4506 for (const auto &N : Line.Tokens) { 4507 if (N.Tok->MacroCtx) 4508 return true; 4509 for (const UnwrappedLine &Child : N.Children) 4510 if (containsExpansion(Child)) 4511 return true; 4512 } 4513 return false; 4514 } 4515 4516 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { 4517 if (Line->Tokens.empty()) 4518 return; 4519 LLVM_DEBUG({ 4520 if (!parsingPPDirective()) { 4521 llvm::dbgs() << "Adding unwrapped line:\n"; 4522 printDebugInfo(*Line); 4523 } 4524 }); 4525 4526 // If this line closes a block when in Whitesmiths mode, remember that 4527 // information so that the level can be decreased after the line is added. 4528 // This has to happen after the addition of the line since the line itself 4529 // needs to be indented. 4530 bool ClosesWhitesmithsBlock = 4531 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && 4532 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; 4533 4534 // If the current line was expanded from a macro call, we use it to 4535 // reconstruct an unwrapped line from the structure of the expanded unwrapped 4536 // line and the unexpanded token stream. 4537 if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { 4538 if (!Reconstruct) 4539 Reconstruct.emplace(Line->Level, Unexpanded); 4540 Reconstruct->addLine(*Line); 4541 4542 // While the reconstructed unexpanded lines are stored in the normal 4543 // flow of lines, the expanded lines are stored on the side to be analyzed 4544 // in an extra step. 4545 CurrentExpandedLines.push_back(std::move(*Line)); 4546 4547 if (Reconstruct->finished()) { 4548 UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); 4549 assert(!Reconstructed.Tokens.empty() && 4550 "Reconstructed must at least contain the macro identifier."); 4551 assert(!parsingPPDirective()); 4552 LLVM_DEBUG({ 4553 llvm::dbgs() << "Adding unexpanded line:\n"; 4554 printDebugInfo(Reconstructed); 4555 }); 4556 ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; 4557 Lines.push_back(std::move(Reconstructed)); 4558 CurrentExpandedLines.clear(); 4559 Reconstruct.reset(); 4560 } 4561 } else { 4562 // At the top level we only get here when no unexpansion is going on, or 4563 // when conditional formatting led to unfinished macro reconstructions. 4564 assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); 4565 CurrentLines->push_back(std::move(*Line)); 4566 } 4567 Line->Tokens.clear(); 4568 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; 4569 Line->FirstStartColumn = 0; 4570 Line->IsContinuation = false; 4571 Line->SeenDecltypeAuto = false; 4572 4573 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) 4574 --Line->Level; 4575 if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { 4576 CurrentLines->append( 4577 std::make_move_iterator(PreprocessorDirectives.begin()), 4578 std::make_move_iterator(PreprocessorDirectives.end())); 4579 PreprocessorDirectives.clear(); 4580 } 4581 // Disconnect the current token from the last token on the previous line. 4582 FormatTok->Previous = nullptr; 4583 } 4584 4585 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); } 4586 4587 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 4588 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 4589 FormatTok.NewlinesBefore > 0; 4590 } 4591 4592 // Checks if \p FormatTok is a line comment that continues the line comment 4593 // section on \p Line. 4594 static bool 4595 continuesLineCommentSection(const FormatToken &FormatTok, 4596 const UnwrappedLine &Line, 4597 const llvm::Regex &CommentPragmasRegex) { 4598 if (Line.Tokens.empty()) 4599 return false; 4600 4601 StringRef IndentContent = FormatTok.TokenText; 4602 if (FormatTok.TokenText.starts_with("//") || 4603 FormatTok.TokenText.starts_with("/*")) { 4604 IndentContent = FormatTok.TokenText.substr(2); 4605 } 4606 if (CommentPragmasRegex.match(IndentContent)) 4607 return false; 4608 4609 // If Line starts with a line comment, then FormatTok continues the comment 4610 // section if its original column is greater or equal to the original start 4611 // column of the line. 4612 // 4613 // Define the min column token of a line as follows: if a line ends in '{' or 4614 // contains a '{' followed by a line comment, then the min column token is 4615 // that '{'. Otherwise, the min column token of the line is the first token of 4616 // the line. 4617 // 4618 // If Line starts with a token other than a line comment, then FormatTok 4619 // continues the comment section if its original column is greater than the 4620 // original start column of the min column token of the line. 4621 // 4622 // For example, the second line comment continues the first in these cases: 4623 // 4624 // // first line 4625 // // second line 4626 // 4627 // and: 4628 // 4629 // // first line 4630 // // second line 4631 // 4632 // and: 4633 // 4634 // int i; // first line 4635 // // second line 4636 // 4637 // and: 4638 // 4639 // do { // first line 4640 // // second line 4641 // int i; 4642 // } while (true); 4643 // 4644 // and: 4645 // 4646 // enum { 4647 // a, // first line 4648 // // second line 4649 // b 4650 // }; 4651 // 4652 // The second line comment doesn't continue the first in these cases: 4653 // 4654 // // first line 4655 // // second line 4656 // 4657 // and: 4658 // 4659 // int i; // first line 4660 // // second line 4661 // 4662 // and: 4663 // 4664 // do { // first line 4665 // // second line 4666 // int i; 4667 // } while (true); 4668 // 4669 // and: 4670 // 4671 // enum { 4672 // a, // first line 4673 // // second line 4674 // }; 4675 const FormatToken *MinColumnToken = Line.Tokens.front().Tok; 4676 4677 // Scan for '{//'. If found, use the column of '{' as a min column for line 4678 // comment section continuation. 4679 const FormatToken *PreviousToken = nullptr; 4680 for (const UnwrappedLineNode &Node : Line.Tokens) { 4681 if (PreviousToken && PreviousToken->is(tok::l_brace) && 4682 isLineComment(*Node.Tok)) { 4683 MinColumnToken = PreviousToken; 4684 break; 4685 } 4686 PreviousToken = Node.Tok; 4687 4688 // Grab the last newline preceding a token in this unwrapped line. 4689 if (Node.Tok->NewlinesBefore > 0) 4690 MinColumnToken = Node.Tok; 4691 } 4692 if (PreviousToken && PreviousToken->is(tok::l_brace)) 4693 MinColumnToken = PreviousToken; 4694 4695 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, 4696 MinColumnToken); 4697 } 4698 4699 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 4700 bool JustComments = Line->Tokens.empty(); 4701 for (FormatToken *Tok : CommentsBeforeNextToken) { 4702 // Line comments that belong to the same line comment section are put on the 4703 // same line since later we might want to reflow content between them. 4704 // Additional fine-grained breaking of line comment sections is controlled 4705 // by the class BreakableLineCommentSection in case it is desirable to keep 4706 // several line comment sections in the same unwrapped line. 4707 // 4708 // FIXME: Consider putting separate line comment sections as children to the 4709 // unwrapped line instead. 4710 Tok->ContinuesLineCommentSection = 4711 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex); 4712 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection) 4713 addUnwrappedLine(); 4714 pushToken(Tok); 4715 } 4716 if (NewlineBeforeNext && JustComments) 4717 addUnwrappedLine(); 4718 CommentsBeforeNextToken.clear(); 4719 } 4720 4721 void UnwrappedLineParser::nextToken(int LevelDifference) { 4722 if (eof()) 4723 return; 4724 flushComments(isOnNewLine(*FormatTok)); 4725 pushToken(FormatTok); 4726 FormatToken *Previous = FormatTok; 4727 if (!Style.isJavaScript()) 4728 readToken(LevelDifference); 4729 else 4730 readTokenWithJavaScriptASI(); 4731 FormatTok->Previous = Previous; 4732 if (Style.isVerilog()) { 4733 // Blocks in Verilog can have `begin` and `end` instead of braces. For 4734 // keywords like `begin`, we can't treat them the same as left braces 4735 // because some contexts require one of them. For example structs use 4736 // braces and if blocks use keywords, and a left brace can occur in an if 4737 // statement, but it is not a block. For keywords like `end`, we simply 4738 // treat them the same as right braces. 4739 if (Keywords.isVerilogEnd(*FormatTok)) 4740 FormatTok->Tok.setKind(tok::r_brace); 4741 } 4742 } 4743 4744 void UnwrappedLineParser::distributeComments( 4745 const SmallVectorImpl<FormatToken *> &Comments, 4746 const FormatToken *NextTok) { 4747 // Whether or not a line comment token continues a line is controlled by 4748 // the method continuesLineCommentSection, with the following caveat: 4749 // 4750 // Define a trail of Comments to be a nonempty proper postfix of Comments such 4751 // that each comment line from the trail is aligned with the next token, if 4752 // the next token exists. If a trail exists, the beginning of the maximal 4753 // trail is marked as a start of a new comment section. 4754 // 4755 // For example in this code: 4756 // 4757 // int a; // line about a 4758 // // line 1 about b 4759 // // line 2 about b 4760 // int b; 4761 // 4762 // the two lines about b form a maximal trail, so there are two sections, the 4763 // first one consisting of the single comment "// line about a" and the 4764 // second one consisting of the next two comments. 4765 if (Comments.empty()) 4766 return; 4767 bool ShouldPushCommentsInCurrentLine = true; 4768 bool HasTrailAlignedWithNextToken = false; 4769 unsigned StartOfTrailAlignedWithNextToken = 0; 4770 if (NextTok) { 4771 // We are skipping the first element intentionally. 4772 for (unsigned i = Comments.size() - 1; i > 0; --i) { 4773 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { 4774 HasTrailAlignedWithNextToken = true; 4775 StartOfTrailAlignedWithNextToken = i; 4776 } 4777 } 4778 } 4779 for (unsigned i = 0, e = Comments.size(); i < e; ++i) { 4780 FormatToken *FormatTok = Comments[i]; 4781 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { 4782 FormatTok->ContinuesLineCommentSection = false; 4783 } else { 4784 FormatTok->ContinuesLineCommentSection = 4785 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); 4786 } 4787 if (!FormatTok->ContinuesLineCommentSection && 4788 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { 4789 ShouldPushCommentsInCurrentLine = false; 4790 } 4791 if (ShouldPushCommentsInCurrentLine) 4792 pushToken(FormatTok); 4793 else 4794 CommentsBeforeNextToken.push_back(FormatTok); 4795 } 4796 } 4797 4798 void UnwrappedLineParser::readToken(int LevelDifference) { 4799 SmallVector<FormatToken *, 1> Comments; 4800 bool PreviousWasComment = false; 4801 bool FirstNonCommentOnLine = false; 4802 do { 4803 FormatTok = Tokens->getNextToken(); 4804 assert(FormatTok); 4805 while (FormatTok->isOneOf(TT_ConflictStart, TT_ConflictEnd, 4806 TT_ConflictAlternative)) { 4807 if (FormatTok->is(TT_ConflictStart)) 4808 conditionalCompilationStart(/*Unreachable=*/false); 4809 else if (FormatTok->is(TT_ConflictAlternative)) 4810 conditionalCompilationAlternative(); 4811 else if (FormatTok->is(TT_ConflictEnd)) 4812 conditionalCompilationEnd(); 4813 FormatTok = Tokens->getNextToken(); 4814 FormatTok->MustBreakBefore = true; 4815 FormatTok->MustBreakBeforeFinalized = true; 4816 } 4817 4818 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine, 4819 const FormatToken &Tok, 4820 bool PreviousWasComment) { 4821 auto IsFirstOnLine = [](const FormatToken &Tok) { 4822 return Tok.HasUnescapedNewline || Tok.IsFirst; 4823 }; 4824 4825 // Consider preprocessor directives preceded by block comments as first 4826 // on line. 4827 if (PreviousWasComment) 4828 return FirstNonCommentOnLine || IsFirstOnLine(Tok); 4829 return IsFirstOnLine(Tok); 4830 }; 4831 4832 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4833 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4834 PreviousWasComment = FormatTok->is(tok::comment); 4835 4836 while (!Line->InPPDirective && FormatTok->is(tok::hash) && 4837 (!Style.isVerilog() || 4838 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) && 4839 FirstNonCommentOnLine) { 4840 distributeComments(Comments, FormatTok); 4841 Comments.clear(); 4842 // If there is an unfinished unwrapped line, we flush the preprocessor 4843 // directives only after that unwrapped line was finished later. 4844 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 4845 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 4846 assert((LevelDifference >= 0 || 4847 static_cast<unsigned>(-LevelDifference) <= Line->Level) && 4848 "LevelDifference makes Line->Level negative"); 4849 Line->Level += LevelDifference; 4850 // Comments stored before the preprocessor directive need to be output 4851 // before the preprocessor directive, at the same level as the 4852 // preprocessor directive, as we consider them to apply to the directive. 4853 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash && 4854 PPBranchLevel > 0) { 4855 Line->Level += PPBranchLevel; 4856 } 4857 assert(Line->Level >= Line->UnbracedBodyLevel); 4858 Line->Level -= Line->UnbracedBodyLevel; 4859 flushComments(isOnNewLine(*FormatTok)); 4860 parsePPDirective(); 4861 PreviousWasComment = FormatTok->is(tok::comment); 4862 FirstNonCommentOnLine = IsFirstNonCommentOnLine( 4863 FirstNonCommentOnLine, *FormatTok, PreviousWasComment); 4864 } 4865 4866 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && 4867 !Line->InPPDirective) { 4868 continue; 4869 } 4870 4871 if (FormatTok->is(tok::identifier) && 4872 Macros.defined(FormatTok->TokenText) && 4873 // FIXME: Allow expanding macros in preprocessor directives. 4874 !Line->InPPDirective) { 4875 FormatToken *ID = FormatTok; 4876 unsigned Position = Tokens->getPosition(); 4877 4878 // To correctly parse the code, we need to replace the tokens of the macro 4879 // call with its expansion. 4880 auto PreCall = std::move(Line); 4881 Line.reset(new UnwrappedLine); 4882 bool OldInExpansion = InExpansion; 4883 InExpansion = true; 4884 // We parse the macro call into a new line. 4885 auto Args = parseMacroCall(); 4886 InExpansion = OldInExpansion; 4887 assert(Line->Tokens.front().Tok == ID); 4888 // And remember the unexpanded macro call tokens. 4889 auto UnexpandedLine = std::move(Line); 4890 // Reset to the old line. 4891 Line = std::move(PreCall); 4892 4893 LLVM_DEBUG({ 4894 llvm::dbgs() << "Macro call: " << ID->TokenText << "("; 4895 if (Args) { 4896 llvm::dbgs() << "("; 4897 for (const auto &Arg : Args.value()) 4898 for (const auto &T : Arg) 4899 llvm::dbgs() << T->TokenText << " "; 4900 llvm::dbgs() << ")"; 4901 } 4902 llvm::dbgs() << "\n"; 4903 }); 4904 if (Macros.objectLike(ID->TokenText) && Args && 4905 !Macros.hasArity(ID->TokenText, Args->size())) { 4906 // The macro is either 4907 // - object-like, but we got argumnets, or 4908 // - overloaded to be both object-like and function-like, but none of 4909 // the function-like arities match the number of arguments. 4910 // Thus, expand as object-like macro. 4911 LLVM_DEBUG(llvm::dbgs() 4912 << "Macro \"" << ID->TokenText 4913 << "\" not overloaded for arity " << Args->size() 4914 << "or not function-like, using object-like overload."); 4915 Args.reset(); 4916 UnexpandedLine->Tokens.resize(1); 4917 Tokens->setPosition(Position); 4918 nextToken(); 4919 assert(!Args && Macros.objectLike(ID->TokenText)); 4920 } 4921 if ((!Args && Macros.objectLike(ID->TokenText)) || 4922 (Args && Macros.hasArity(ID->TokenText, Args->size()))) { 4923 // Next, we insert the expanded tokens in the token stream at the 4924 // current position, and continue parsing. 4925 Unexpanded[ID] = std::move(UnexpandedLine); 4926 SmallVector<FormatToken *, 8> Expansion = 4927 Macros.expand(ID, std::move(Args)); 4928 if (!Expansion.empty()) 4929 FormatTok = Tokens->insertTokens(Expansion); 4930 4931 LLVM_DEBUG({ 4932 llvm::dbgs() << "Expanded: "; 4933 for (const auto &T : Expansion) 4934 llvm::dbgs() << T->TokenText << " "; 4935 llvm::dbgs() << "\n"; 4936 }); 4937 } else { 4938 LLVM_DEBUG({ 4939 llvm::dbgs() << "Did not expand macro \"" << ID->TokenText 4940 << "\", because it was used "; 4941 if (Args) 4942 llvm::dbgs() << "with " << Args->size(); 4943 else 4944 llvm::dbgs() << "without"; 4945 llvm::dbgs() << " arguments, which doesn't match any definition.\n"; 4946 }); 4947 Tokens->setPosition(Position); 4948 FormatTok = ID; 4949 } 4950 } 4951 4952 if (FormatTok->isNot(tok::comment)) { 4953 distributeComments(Comments, FormatTok); 4954 Comments.clear(); 4955 return; 4956 } 4957 4958 Comments.push_back(FormatTok); 4959 } while (!eof()); 4960 4961 distributeComments(Comments, nullptr); 4962 Comments.clear(); 4963 } 4964 4965 namespace { 4966 template <typename Iterator> 4967 void pushTokens(Iterator Begin, Iterator End, 4968 llvm::SmallVectorImpl<FormatToken *> &Into) { 4969 for (auto I = Begin; I != End; ++I) { 4970 Into.push_back(I->Tok); 4971 for (const auto &Child : I->Children) 4972 pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); 4973 } 4974 } 4975 } // namespace 4976 4977 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> 4978 UnwrappedLineParser::parseMacroCall() { 4979 std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; 4980 assert(Line->Tokens.empty()); 4981 nextToken(); 4982 if (FormatTok->isNot(tok::l_paren)) 4983 return Args; 4984 unsigned Position = Tokens->getPosition(); 4985 FormatToken *Tok = FormatTok; 4986 nextToken(); 4987 Args.emplace(); 4988 auto ArgStart = std::prev(Line->Tokens.end()); 4989 4990 int Parens = 0; 4991 do { 4992 switch (FormatTok->Tok.getKind()) { 4993 case tok::l_paren: 4994 ++Parens; 4995 nextToken(); 4996 break; 4997 case tok::r_paren: { 4998 if (Parens > 0) { 4999 --Parens; 5000 nextToken(); 5001 break; 5002 } 5003 Args->push_back({}); 5004 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5005 nextToken(); 5006 return Args; 5007 } 5008 case tok::comma: { 5009 if (Parens > 0) { 5010 nextToken(); 5011 break; 5012 } 5013 Args->push_back({}); 5014 pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); 5015 nextToken(); 5016 ArgStart = std::prev(Line->Tokens.end()); 5017 break; 5018 } 5019 default: 5020 nextToken(); 5021 break; 5022 } 5023 } while (!eof()); 5024 Line->Tokens.resize(1); 5025 Tokens->setPosition(Position); 5026 FormatTok = Tok; 5027 return {}; 5028 } 5029 5030 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 5031 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 5032 if (MustBreakBeforeNextToken) { 5033 Line->Tokens.back().Tok->MustBreakBefore = true; 5034 Line->Tokens.back().Tok->MustBreakBeforeFinalized = true; 5035 MustBreakBeforeNextToken = false; 5036 } 5037 } 5038 5039 } // end namespace format 5040 } // end namespace clang 5041