1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20
21 #include <algorithm>
22
23 #define DEBUG_TYPE "format-parser"
24
25 namespace clang {
26 namespace format {
27
28 class FormatTokenSource {
29 public:
~FormatTokenSource()30 virtual ~FormatTokenSource() {}
31 virtual FormatToken *getNextToken() = 0;
32
33 virtual unsigned getPosition() = 0;
34 virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36
37 namespace {
38
39 class ScopedDeclarationState {
40 public:
ScopedDeclarationState(UnwrappedLine & Line,std::vector<bool> & Stack,bool MustBeDeclaration)41 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42 bool MustBeDeclaration)
43 : Line(Line), Stack(Stack) {
44 Line.MustBeDeclaration = MustBeDeclaration;
45 Stack.push_back(MustBeDeclaration);
46 }
~ScopedDeclarationState()47 ~ScopedDeclarationState() {
48 Stack.pop_back();
49 if (!Stack.empty())
50 Line.MustBeDeclaration = Stack.back();
51 else
52 Line.MustBeDeclaration = true;
53 }
54
55 private:
56 UnwrappedLine &Line;
57 std::vector<bool> &Stack;
58 };
59
isLineComment(const FormatToken & FormatTok)60 static bool isLineComment(const FormatToken &FormatTok) {
61 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
continuesLineComment(const FormatToken & FormatTok,const FormatToken * Previous,const FormatToken * MinColumnToken)67 static bool continuesLineComment(const FormatToken &FormatTok,
68 const FormatToken *Previous,
69 const FormatToken *MinColumnToken) {
70 if (!Previous || !MinColumnToken)
71 return false;
72 unsigned MinContinueColumn =
73 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75 isLineComment(*Previous) &&
76 FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78
79 class ScopedMacroState : public FormatTokenSource {
80 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)81 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82 FormatToken *&ResetToken)
83 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85 Token(nullptr), PreviousToken(nullptr) {
86 FakeEOF.Tok.startToken();
87 FakeEOF.Tok.setKind(tok::eof);
88 TokenSource = this;
89 Line.Level = 0;
90 Line.InPPDirective = true;
91 }
92
~ScopedMacroState()93 ~ScopedMacroState() override {
94 TokenSource = PreviousTokenSource;
95 ResetToken = Token;
96 Line.InPPDirective = false;
97 Line.Level = PreviousLineLevel;
98 }
99
getNextToken()100 FormatToken *getNextToken() override {
101 // The \c UnwrappedLineParser guards against this by never calling
102 // \c getNextToken() after it has encountered the first eof token.
103 assert(!eof());
104 PreviousToken = Token;
105 Token = PreviousTokenSource->getNextToken();
106 if (eof())
107 return &FakeEOF;
108 return Token;
109 }
110
getPosition()111 unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112
setPosition(unsigned Position)113 FormatToken *setPosition(unsigned Position) override {
114 PreviousToken = nullptr;
115 Token = PreviousTokenSource->setPosition(Position);
116 return Token;
117 }
118
119 private:
eof()120 bool eof() {
121 return Token && Token->HasUnescapedNewline &&
122 !continuesLineComment(*Token, PreviousToken,
123 /*MinColumnToken=*/PreviousToken);
124 }
125
126 FormatToken FakeEOF;
127 UnwrappedLine &Line;
128 FormatTokenSource *&TokenSource;
129 FormatToken *&ResetToken;
130 unsigned PreviousLineLevel;
131 FormatTokenSource *PreviousTokenSource;
132
133 FormatToken *Token;
134 FormatToken *PreviousToken;
135 };
136
137 } // end anonymous namespace
138
139 class ScopedLineState {
140 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)141 ScopedLineState(UnwrappedLineParser &Parser,
142 bool SwitchToPreprocessorLines = false)
143 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144 if (SwitchToPreprocessorLines)
145 Parser.CurrentLines = &Parser.PreprocessorDirectives;
146 else if (!Parser.Line->Tokens.empty())
147 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148 PreBlockLine = std::move(Parser.Line);
149 Parser.Line = std::make_unique<UnwrappedLine>();
150 Parser.Line->Level = PreBlockLine->Level;
151 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152 }
153
~ScopedLineState()154 ~ScopedLineState() {
155 if (!Parser.Line->Tokens.empty()) {
156 Parser.addUnwrappedLine();
157 }
158 assert(Parser.Line->Tokens.empty());
159 Parser.Line = std::move(PreBlockLine);
160 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161 Parser.MustBreakBeforeNextToken = true;
162 Parser.CurrentLines = OriginalLines;
163 }
164
165 private:
166 UnwrappedLineParser &Parser;
167
168 std::unique_ptr<UnwrappedLine> PreBlockLine;
169 SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171
172 class CompoundStatementIndenter {
173 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)174 CompoundStatementIndenter(UnwrappedLineParser *Parser,
175 const FormatStyle &Style, unsigned &LineLevel)
176 : CompoundStatementIndenter(Parser, LineLevel,
177 Style.BraceWrapping.AfterControlStatement,
178 Style.BraceWrapping.IndentBraces) {}
CompoundStatementIndenter(UnwrappedLineParser * Parser,unsigned & LineLevel,bool WrapBrace,bool IndentBrace)179 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
180 bool WrapBrace, bool IndentBrace)
181 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182 if (WrapBrace)
183 Parser->addUnwrappedLine();
184 if (IndentBrace)
185 ++LineLevel;
186 }
~CompoundStatementIndenter()187 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188
189 private:
190 unsigned &LineLevel;
191 unsigned OldLineLevel;
192 };
193
194 namespace {
195
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)198 IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199 : Tokens(Tokens), Position(-1) {}
200
getNextToken()201 FormatToken *getNextToken() override {
202 ++Position;
203 return Tokens[Position];
204 }
205
getPosition()206 unsigned getPosition() override {
207 assert(Position >= 0);
208 return Position;
209 }
210
setPosition(unsigned P)211 FormatToken *setPosition(unsigned P) override {
212 Position = P;
213 return Tokens[Position];
214 }
215
reset()216 void reset() { Position = -1; }
217
218 private:
219 ArrayRef<FormatToken *> Tokens;
220 int Position;
221 };
222
223 } // end anonymous namespace
224
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned FirstStartColumn,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
226 const AdditionalKeywords &Keywords,
227 unsigned FirstStartColumn,
228 ArrayRef<FormatToken *> Tokens,
229 UnwrappedLineConsumer &Callback)
230 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235 ? IG_Rejected
236 : IG_Inited),
237 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238
reset()239 void UnwrappedLineParser::reset() {
240 PPBranchLevel = -1;
241 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242 ? IG_Rejected
243 : IG_Inited;
244 IncludeGuardToken = nullptr;
245 Line.reset(new UnwrappedLine);
246 CommentsBeforeNextToken.clear();
247 FormatTok = nullptr;
248 MustBreakBeforeNextToken = false;
249 PreprocessorDirectives.clear();
250 CurrentLines = &Lines;
251 DeclarationScopeStack.clear();
252 PPStack.clear();
253 Line->FirstStartColumn = FirstStartColumn;
254 }
255
parse()256 void UnwrappedLineParser::parse() {
257 IndexedTokenSource TokenSource(AllTokens);
258 Line->FirstStartColumn = FirstStartColumn;
259 do {
260 LLVM_DEBUG(llvm::dbgs() << "----\n");
261 reset();
262 Tokens = &TokenSource;
263 TokenSource.reset();
264
265 readToken();
266 parseFile();
267
268 // If we found an include guard then all preprocessor directives (other than
269 // the guard) are over-indented by one.
270 if (IncludeGuard == IG_Found)
271 for (auto &Line : Lines)
272 if (Line.InPPDirective && Line.Level > 0)
273 --Line.Level;
274
275 // Create line with eof token.
276 pushToken(FormatTok);
277 addUnwrappedLine();
278
279 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280 E = Lines.end();
281 I != E; ++I) {
282 Callback.consumeUnwrappedLine(*I);
283 }
284 Callback.finishRun();
285 Lines.clear();
286 while (!PPLevelBranchIndex.empty() &&
287 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290 }
291 if (!PPLevelBranchIndex.empty()) {
292 ++PPLevelBranchIndex.back();
293 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295 }
296 } while (!PPLevelBranchIndex.empty());
297 }
298
parseFile()299 void UnwrappedLineParser::parseFile() {
300 // The top-level context in a file always has declarations, except for pre-
301 // processor directives and JavaScript files.
302 bool MustBeDeclaration =
303 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305 MustBeDeclaration);
306 if (Style.Language == FormatStyle::LK_TextProto)
307 parseBracedList();
308 else
309 parseLevel(/*HasOpeningBrace=*/false);
310 // Make sure to format the remaining tokens.
311 //
312 // LK_TextProto is special since its top-level is parsed as the body of a
313 // braced list, which does not necessarily have natural line separators such
314 // as a semicolon. Comments after the last entry that have been determined to
315 // not belong to that line, as in:
316 // key: value
317 // // endfile comment
318 // do not have a chance to be put on a line of their own until this point.
319 // Here we add this newline before end-of-file comments.
320 if (Style.Language == FormatStyle::LK_TextProto &&
321 !CommentsBeforeNextToken.empty())
322 addUnwrappedLine();
323 flushComments(true);
324 addUnwrappedLine();
325 }
326
parseCSharpGenericTypeConstraint()327 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328 do {
329 switch (FormatTok->Tok.getKind()) {
330 case tok::l_brace:
331 return;
332 default:
333 if (FormatTok->is(Keywords.kw_where)) {
334 addUnwrappedLine();
335 nextToken();
336 parseCSharpGenericTypeConstraint();
337 break;
338 }
339 nextToken();
340 break;
341 }
342 } while (!eof());
343 }
344
parseCSharpAttribute()345 void UnwrappedLineParser::parseCSharpAttribute() {
346 int UnpairedSquareBrackets = 1;
347 do {
348 switch (FormatTok->Tok.getKind()) {
349 case tok::r_square:
350 nextToken();
351 --UnpairedSquareBrackets;
352 if (UnpairedSquareBrackets == 0) {
353 addUnwrappedLine();
354 return;
355 }
356 break;
357 case tok::l_square:
358 ++UnpairedSquareBrackets;
359 nextToken();
360 break;
361 default:
362 nextToken();
363 break;
364 }
365 } while (!eof());
366 }
367
parseLevel(bool HasOpeningBrace)368 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369 bool SwitchLabelEncountered = false;
370 do {
371 tok::TokenKind kind = FormatTok->Tok.getKind();
372 if (FormatTok->getType() == TT_MacroBlockBegin) {
373 kind = tok::l_brace;
374 } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375 kind = tok::r_brace;
376 }
377
378 switch (kind) {
379 case tok::comment:
380 nextToken();
381 addUnwrappedLine();
382 break;
383 case tok::l_brace:
384 // FIXME: Add parameter whether this can happen - if this happens, we must
385 // be in a non-declaration context.
386 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387 continue;
388 parseBlock(/*MustBeDeclaration=*/false);
389 addUnwrappedLine();
390 break;
391 case tok::r_brace:
392 if (HasOpeningBrace)
393 return;
394 nextToken();
395 addUnwrappedLine();
396 break;
397 case tok::kw_default: {
398 unsigned StoredPosition = Tokens->getPosition();
399 FormatToken *Next;
400 do {
401 Next = Tokens->getNextToken();
402 } while (Next && Next->is(tok::comment));
403 FormatTok = Tokens->setPosition(StoredPosition);
404 if (Next && Next->isNot(tok::colon)) {
405 // default not followed by ':' is not a case label; treat it like
406 // an identifier.
407 parseStructuralElement();
408 break;
409 }
410 // Else, if it is 'default:', fall through to the case handling.
411 LLVM_FALLTHROUGH;
412 }
413 case tok::kw_case:
414 if (Style.Language == FormatStyle::LK_JavaScript &&
415 Line->MustBeDeclaration) {
416 // A 'case: string' style field declaration.
417 parseStructuralElement();
418 break;
419 }
420 if (!SwitchLabelEncountered &&
421 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422 ++Line->Level;
423 SwitchLabelEncountered = true;
424 parseStructuralElement();
425 break;
426 case tok::l_square:
427 if (Style.isCSharp()) {
428 nextToken();
429 parseCSharpAttribute();
430 break;
431 }
432 LLVM_FALLTHROUGH;
433 default:
434 parseStructuralElement();
435 break;
436 }
437 } while (!eof());
438 }
439
calculateBraceTypes(bool ExpectClassBody)440 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441 // We'll parse forward through the tokens until we hit
442 // a closing brace or eof - note that getNextToken() will
443 // parse macros, so this will magically work inside macro
444 // definitions, too.
445 unsigned StoredPosition = Tokens->getPosition();
446 FormatToken *Tok = FormatTok;
447 const FormatToken *PrevTok = Tok->Previous;
448 // Keep a stack of positions of lbrace tokens. We will
449 // update information about whether an lbrace starts a
450 // braced init list or a different block during the loop.
451 SmallVector<FormatToken *, 8> LBraceStack;
452 assert(Tok->Tok.is(tok::l_brace));
453 do {
454 // Get next non-comment token.
455 FormatToken *NextTok;
456 unsigned ReadTokens = 0;
457 do {
458 NextTok = Tokens->getNextToken();
459 ++ReadTokens;
460 } while (NextTok->is(tok::comment));
461
462 switch (Tok->Tok.getKind()) {
463 case tok::l_brace:
464 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465 if (PrevTok->isOneOf(tok::colon, tok::less))
466 // A ':' indicates this code is in a type, or a braced list
467 // following a label in an object literal ({a: {b: 1}}).
468 // A '<' could be an object used in a comparison, but that is nonsense
469 // code (can never return true), so more likely it is a generic type
470 // argument (`X<{a: string; b: number}>`).
471 // The code below could be confused by semicolons between the
472 // individual members in a type member list, which would normally
473 // trigger BK_Block. In both cases, this must be parsed as an inline
474 // braced init.
475 Tok->setBlockKind(BK_BracedInit);
476 else if (PrevTok->is(tok::r_paren))
477 // `) { }` can only occur in function or method declarations in JS.
478 Tok->setBlockKind(BK_Block);
479 } else {
480 Tok->setBlockKind(BK_Unknown);
481 }
482 LBraceStack.push_back(Tok);
483 break;
484 case tok::r_brace:
485 if (LBraceStack.empty())
486 break;
487 if (LBraceStack.back()->is(BK_Unknown)) {
488 bool ProbablyBracedList = false;
489 if (Style.Language == FormatStyle::LK_Proto) {
490 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491 } else {
492 // Using OriginalColumn to distinguish between ObjC methods and
493 // binary operators is a bit hacky.
494 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
495 NextTok->OriginalColumn == 0;
496
497 // If there is a comma, semicolon or right paren after the closing
498 // brace, we assume this is a braced initializer list. Note that
499 // regardless how we mark inner braces here, we will overwrite the
500 // BlockKind later if we parse a braced list (where all blocks
501 // inside are by default braced lists), or when we explicitly detect
502 // blocks (for example while parsing lambdas).
503 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
504 // braced list in JS.
505 ProbablyBracedList =
506 (Style.Language == FormatStyle::LK_JavaScript &&
507 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
508 Keywords.kw_as)) ||
509 (Style.isCpp() && NextTok->is(tok::l_paren)) ||
510 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
511 tok::r_paren, tok::r_square, tok::l_brace,
512 tok::ellipsis) ||
513 (NextTok->is(tok::identifier) &&
514 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
515 (NextTok->is(tok::semi) &&
516 (!ExpectClassBody || LBraceStack.size() != 1)) ||
517 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
518 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
519 // We can have an array subscript after a braced init
520 // list, but C++11 attributes are expected after blocks.
521 NextTok = Tokens->getNextToken();
522 ++ReadTokens;
523 ProbablyBracedList = NextTok->isNot(tok::l_square);
524 }
525 }
526 if (ProbablyBracedList) {
527 Tok->setBlockKind(BK_BracedInit);
528 LBraceStack.back()->setBlockKind(BK_BracedInit);
529 } else {
530 Tok->setBlockKind(BK_Block);
531 LBraceStack.back()->setBlockKind(BK_Block);
532 }
533 }
534 LBraceStack.pop_back();
535 break;
536 case tok::identifier:
537 if (!Tok->is(TT_StatementMacro))
538 break;
539 LLVM_FALLTHROUGH;
540 case tok::at:
541 case tok::semi:
542 case tok::kw_if:
543 case tok::kw_while:
544 case tok::kw_for:
545 case tok::kw_switch:
546 case tok::kw_try:
547 case tok::kw___try:
548 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
549 LBraceStack.back()->setBlockKind(BK_Block);
550 break;
551 default:
552 break;
553 }
554 PrevTok = Tok;
555 Tok = NextTok;
556 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
557
558 // Assume other blocks for all unclosed opening braces.
559 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
560 if (LBraceStack[i]->is(BK_Unknown))
561 LBraceStack[i]->setBlockKind(BK_Block);
562 }
563
564 FormatTok = Tokens->setPosition(StoredPosition);
565 }
566
567 template <class T>
hash_combine(std::size_t & seed,const T & v)568 static inline void hash_combine(std::size_t &seed, const T &v) {
569 std::hash<T> hasher;
570 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
571 }
572
computePPHash() const573 size_t UnwrappedLineParser::computePPHash() const {
574 size_t h = 0;
575 for (const auto &i : PPStack) {
576 hash_combine(h, size_t(i.Kind));
577 hash_combine(h, i.Line);
578 }
579 return h;
580 }
581
parseBlock(bool MustBeDeclaration,unsigned AddLevels,bool MunchSemi,bool UnindentWhitesmithsBraces)582 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
583 bool MunchSemi,
584 bool UnindentWhitesmithsBraces) {
585 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
586 "'{' or macro block token expected");
587 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
588 FormatTok->setBlockKind(BK_Block);
589
590 // For Whitesmiths mode, jump to the next level prior to skipping over the
591 // braces.
592 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
593 ++Line->Level;
594
595 size_t PPStartHash = computePPHash();
596
597 unsigned InitialLevel = Line->Level;
598 nextToken(/*LevelDifference=*/AddLevels);
599
600 if (MacroBlock && FormatTok->is(tok::l_paren))
601 parseParens();
602
603 size_t NbPreprocessorDirectives =
604 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
605 addUnwrappedLine();
606 size_t OpeningLineIndex =
607 CurrentLines->empty()
608 ? (UnwrappedLine::kInvalidIndex)
609 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
610
611 // Whitesmiths is weird here. The brace needs to be indented for the namespace
612 // block, but the block itself may not be indented depending on the style
613 // settings. This allows the format to back up one level in those cases.
614 if (UnindentWhitesmithsBraces)
615 --Line->Level;
616
617 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
618 MustBeDeclaration);
619 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
620 Line->Level += AddLevels;
621 parseLevel(/*HasOpeningBrace=*/true);
622
623 if (eof())
624 return;
625
626 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
627 : !FormatTok->is(tok::r_brace)) {
628 Line->Level = InitialLevel;
629 FormatTok->setBlockKind(BK_Block);
630 return;
631 }
632
633 size_t PPEndHash = computePPHash();
634
635 // Munch the closing brace.
636 nextToken(/*LevelDifference=*/-AddLevels);
637
638 if (MacroBlock && FormatTok->is(tok::l_paren))
639 parseParens();
640
641 if (FormatTok->is(tok::arrow)) {
642 // Following the } we can find a trailing return type arrow
643 // as part of an implicit conversion constraint.
644 nextToken();
645 parseStructuralElement();
646 }
647
648 if (MunchSemi && FormatTok->Tok.is(tok::semi))
649 nextToken();
650
651 Line->Level = InitialLevel;
652
653 if (PPStartHash == PPEndHash) {
654 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
655 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
656 // Update the opening line to add the forward reference as well
657 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
658 CurrentLines->size() - 1;
659 }
660 }
661 }
662
isGoogScope(const UnwrappedLine & Line)663 static bool isGoogScope(const UnwrappedLine &Line) {
664 // FIXME: Closure-library specific stuff should not be hard-coded but be
665 // configurable.
666 if (Line.Tokens.size() < 4)
667 return false;
668 auto I = Line.Tokens.begin();
669 if (I->Tok->TokenText != "goog")
670 return false;
671 ++I;
672 if (I->Tok->isNot(tok::period))
673 return false;
674 ++I;
675 if (I->Tok->TokenText != "scope")
676 return false;
677 ++I;
678 return I->Tok->is(tok::l_paren);
679 }
680
isIIFE(const UnwrappedLine & Line,const AdditionalKeywords & Keywords)681 static bool isIIFE(const UnwrappedLine &Line,
682 const AdditionalKeywords &Keywords) {
683 // Look for the start of an immediately invoked anonymous function.
684 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
685 // This is commonly done in JavaScript to create a new, anonymous scope.
686 // Example: (function() { ... })()
687 if (Line.Tokens.size() < 3)
688 return false;
689 auto I = Line.Tokens.begin();
690 if (I->Tok->isNot(tok::l_paren))
691 return false;
692 ++I;
693 if (I->Tok->isNot(Keywords.kw_function))
694 return false;
695 ++I;
696 return I->Tok->is(tok::l_paren);
697 }
698
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)699 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
700 const FormatToken &InitialToken) {
701 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
702 return Style.BraceWrapping.AfterNamespace;
703 if (InitialToken.is(tok::kw_class))
704 return Style.BraceWrapping.AfterClass;
705 if (InitialToken.is(tok::kw_union))
706 return Style.BraceWrapping.AfterUnion;
707 if (InitialToken.is(tok::kw_struct))
708 return Style.BraceWrapping.AfterStruct;
709 return false;
710 }
711
parseChildBlock()712 void UnwrappedLineParser::parseChildBlock() {
713 FormatTok->setBlockKind(BK_Block);
714 nextToken();
715 {
716 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
717 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
718 ScopedLineState LineState(*this);
719 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
720 /*MustBeDeclaration=*/false);
721 Line->Level += SkipIndent ? 0 : 1;
722 parseLevel(/*HasOpeningBrace=*/true);
723 flushComments(isOnNewLine(*FormatTok));
724 Line->Level -= SkipIndent ? 0 : 1;
725 }
726 nextToken();
727 }
728
parsePPDirective()729 void UnwrappedLineParser::parsePPDirective() {
730 assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
731 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
732
733 nextToken();
734
735 if (!FormatTok->Tok.getIdentifierInfo()) {
736 parsePPUnknown();
737 return;
738 }
739
740 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
741 case tok::pp_define:
742 parsePPDefine();
743 return;
744 case tok::pp_if:
745 parsePPIf(/*IfDef=*/false);
746 break;
747 case tok::pp_ifdef:
748 case tok::pp_ifndef:
749 parsePPIf(/*IfDef=*/true);
750 break;
751 case tok::pp_else:
752 parsePPElse();
753 break;
754 case tok::pp_elif:
755 parsePPElIf();
756 break;
757 case tok::pp_endif:
758 parsePPEndIf();
759 break;
760 default:
761 parsePPUnknown();
762 break;
763 }
764 }
765
conditionalCompilationCondition(bool Unreachable)766 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
767 size_t Line = CurrentLines->size();
768 if (CurrentLines == &PreprocessorDirectives)
769 Line += Lines.size();
770
771 if (Unreachable ||
772 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
773 PPStack.push_back({PP_Unreachable, Line});
774 else
775 PPStack.push_back({PP_Conditional, Line});
776 }
777
conditionalCompilationStart(bool Unreachable)778 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
779 ++PPBranchLevel;
780 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
781 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
782 PPLevelBranchIndex.push_back(0);
783 PPLevelBranchCount.push_back(0);
784 }
785 PPChainBranchIndex.push(0);
786 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
787 conditionalCompilationCondition(Unreachable || Skip);
788 }
789
conditionalCompilationAlternative()790 void UnwrappedLineParser::conditionalCompilationAlternative() {
791 if (!PPStack.empty())
792 PPStack.pop_back();
793 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
794 if (!PPChainBranchIndex.empty())
795 ++PPChainBranchIndex.top();
796 conditionalCompilationCondition(
797 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
798 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
799 }
800
conditionalCompilationEnd()801 void UnwrappedLineParser::conditionalCompilationEnd() {
802 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
803 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
804 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
805 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
806 }
807 }
808 // Guard against #endif's without #if.
809 if (PPBranchLevel > -1)
810 --PPBranchLevel;
811 if (!PPChainBranchIndex.empty())
812 PPChainBranchIndex.pop();
813 if (!PPStack.empty())
814 PPStack.pop_back();
815 }
816
parsePPIf(bool IfDef)817 void UnwrappedLineParser::parsePPIf(bool IfDef) {
818 bool IfNDef = FormatTok->is(tok::pp_ifndef);
819 nextToken();
820 bool Unreachable = false;
821 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
822 Unreachable = true;
823 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
824 Unreachable = true;
825 conditionalCompilationStart(Unreachable);
826 FormatToken *IfCondition = FormatTok;
827 // If there's a #ifndef on the first line, and the only lines before it are
828 // comments, it could be an include guard.
829 bool MaybeIncludeGuard = IfNDef;
830 if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
831 for (auto &Line : Lines) {
832 if (!Line.Tokens.front().Tok->is(tok::comment)) {
833 MaybeIncludeGuard = false;
834 IncludeGuard = IG_Rejected;
835 break;
836 }
837 }
838 --PPBranchLevel;
839 parsePPUnknown();
840 ++PPBranchLevel;
841 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
842 IncludeGuard = IG_IfNdefed;
843 IncludeGuardToken = IfCondition;
844 }
845 }
846
parsePPElse()847 void UnwrappedLineParser::parsePPElse() {
848 // If a potential include guard has an #else, it's not an include guard.
849 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
850 IncludeGuard = IG_Rejected;
851 conditionalCompilationAlternative();
852 if (PPBranchLevel > -1)
853 --PPBranchLevel;
854 parsePPUnknown();
855 ++PPBranchLevel;
856 }
857
parsePPElIf()858 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
859
parsePPEndIf()860 void UnwrappedLineParser::parsePPEndIf() {
861 conditionalCompilationEnd();
862 parsePPUnknown();
863 // If the #endif of a potential include guard is the last thing in the file,
864 // then we found an include guard.
865 unsigned TokenPosition = Tokens->getPosition();
866 FormatToken *PeekNext = AllTokens[TokenPosition];
867 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
868 PeekNext->is(tok::eof) &&
869 Style.IndentPPDirectives != FormatStyle::PPDIS_None)
870 IncludeGuard = IG_Found;
871 }
872
parsePPDefine()873 void UnwrappedLineParser::parsePPDefine() {
874 nextToken();
875
876 if (!FormatTok->Tok.getIdentifierInfo()) {
877 IncludeGuard = IG_Rejected;
878 IncludeGuardToken = nullptr;
879 parsePPUnknown();
880 return;
881 }
882
883 if (IncludeGuard == IG_IfNdefed &&
884 IncludeGuardToken->TokenText == FormatTok->TokenText) {
885 IncludeGuard = IG_Defined;
886 IncludeGuardToken = nullptr;
887 for (auto &Line : Lines) {
888 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
889 IncludeGuard = IG_Rejected;
890 break;
891 }
892 }
893 }
894
895 nextToken();
896 if (FormatTok->Tok.getKind() == tok::l_paren &&
897 FormatTok->WhitespaceRange.getBegin() ==
898 FormatTok->WhitespaceRange.getEnd()) {
899 parseParens();
900 }
901 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
902 Line->Level += PPBranchLevel + 1;
903 addUnwrappedLine();
904 ++Line->Level;
905
906 // Errors during a preprocessor directive can only affect the layout of the
907 // preprocessor directive, and thus we ignore them. An alternative approach
908 // would be to use the same approach we use on the file level (no
909 // re-indentation if there was a structural error) within the macro
910 // definition.
911 parseFile();
912 }
913
parsePPUnknown()914 void UnwrappedLineParser::parsePPUnknown() {
915 do {
916 nextToken();
917 } while (!eof());
918 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
919 Line->Level += PPBranchLevel + 1;
920 addUnwrappedLine();
921 }
922
923 // Here we exclude certain tokens that are not usually the first token in an
924 // unwrapped line. This is used in attempt to distinguish macro calls without
925 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const FormatToken & Tok)926 static bool tokenCanStartNewLine(const FormatToken &Tok) {
927 // Semicolon can be a null-statement, l_square can be a start of a macro or
928 // a C++11 attribute, but this doesn't seem to be common.
929 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
930 Tok.isNot(TT_AttributeSquare) &&
931 // Tokens that can only be used as binary operators and a part of
932 // overloaded operator names.
933 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
934 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
935 Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
936 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
937 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
938 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
939 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
940 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
941 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
942 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
943 Tok.isNot(tok::lesslessequal) &&
944 // Colon is used in labels, base class lists, initializer lists,
945 // range-based for loops, ternary operator, but should never be the
946 // first token in an unwrapped line.
947 Tok.isNot(tok::colon) &&
948 // 'noexcept' is a trailing annotation.
949 Tok.isNot(tok::kw_noexcept);
950 }
951
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)952 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
953 const FormatToken *FormatTok) {
954 // FIXME: This returns true for C/C++ keywords like 'struct'.
955 return FormatTok->is(tok::identifier) &&
956 (FormatTok->Tok.getIdentifierInfo() == nullptr ||
957 !FormatTok->isOneOf(
958 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
959 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
960 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
961 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
962 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
963 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
964 Keywords.kw_from));
965 }
966
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)967 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
968 const FormatToken *FormatTok) {
969 return FormatTok->Tok.isLiteral() ||
970 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
971 mustBeJSIdent(Keywords, FormatTok);
972 }
973
974 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
975 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)976 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
977 const FormatToken *FormatTok) {
978 return FormatTok->isOneOf(
979 tok::kw_return, Keywords.kw_yield,
980 // conditionals
981 tok::kw_if, tok::kw_else,
982 // loops
983 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
984 // switch/case
985 tok::kw_switch, tok::kw_case,
986 // exceptions
987 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
988 // declaration
989 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
990 Keywords.kw_async, Keywords.kw_function,
991 // import/export
992 Keywords.kw_import, tok::kw_export);
993 }
994
995 // readTokenWithJavaScriptASI reads the next token and terminates the current
996 // line if JavaScript Automatic Semicolon Insertion must
997 // happen between the current token and the next token.
998 //
999 // This method is conservative - it cannot cover all edge cases of JavaScript,
1000 // but only aims to correctly handle certain well known cases. It *must not*
1001 // return true in speculative cases.
readTokenWithJavaScriptASI()1002 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1003 FormatToken *Previous = FormatTok;
1004 readToken();
1005 FormatToken *Next = FormatTok;
1006
1007 bool IsOnSameLine =
1008 CommentsBeforeNextToken.empty()
1009 ? Next->NewlinesBefore == 0
1010 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1011 if (IsOnSameLine)
1012 return;
1013
1014 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1015 bool PreviousStartsTemplateExpr =
1016 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1017 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1018 // If the line contains an '@' sign, the previous token might be an
1019 // annotation, which can precede another identifier/value.
1020 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1021 [](UnwrappedLineNode &LineNode) {
1022 return LineNode.Tok->is(tok::at);
1023 }) != Line->Tokens.end();
1024 if (HasAt)
1025 return;
1026 }
1027 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1028 return addUnwrappedLine();
1029 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1030 bool NextEndsTemplateExpr =
1031 Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1032 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1033 (PreviousMustBeValue ||
1034 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1035 tok::minusminus)))
1036 return addUnwrappedLine();
1037 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1038 isJSDeclOrStmt(Keywords, Next))
1039 return addUnwrappedLine();
1040 }
1041
parseStructuralElement()1042 void UnwrappedLineParser::parseStructuralElement() {
1043 assert(!FormatTok->is(tok::l_brace));
1044 if (Style.Language == FormatStyle::LK_TableGen &&
1045 FormatTok->is(tok::pp_include)) {
1046 nextToken();
1047 if (FormatTok->is(tok::string_literal))
1048 nextToken();
1049 addUnwrappedLine();
1050 return;
1051 }
1052 switch (FormatTok->Tok.getKind()) {
1053 case tok::kw_asm:
1054 nextToken();
1055 if (FormatTok->is(tok::l_brace)) {
1056 FormatTok->setType(TT_InlineASMBrace);
1057 nextToken();
1058 while (FormatTok && FormatTok->isNot(tok::eof)) {
1059 if (FormatTok->is(tok::r_brace)) {
1060 FormatTok->setType(TT_InlineASMBrace);
1061 nextToken();
1062 addUnwrappedLine();
1063 break;
1064 }
1065 FormatTok->Finalized = true;
1066 nextToken();
1067 }
1068 }
1069 break;
1070 case tok::kw_namespace:
1071 parseNamespace();
1072 return;
1073 case tok::kw_public:
1074 case tok::kw_protected:
1075 case tok::kw_private:
1076 if (Style.Language == FormatStyle::LK_Java ||
1077 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1078 nextToken();
1079 else
1080 parseAccessSpecifier();
1081 return;
1082 case tok::kw_if:
1083 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1084 // field/method declaration.
1085 break;
1086 parseIfThenElse();
1087 return;
1088 case tok::kw_for:
1089 case tok::kw_while:
1090 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1091 // field/method declaration.
1092 break;
1093 parseForOrWhileLoop();
1094 return;
1095 case tok::kw_do:
1096 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1097 // field/method declaration.
1098 break;
1099 parseDoWhile();
1100 return;
1101 case tok::kw_switch:
1102 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1103 // 'switch: string' field declaration.
1104 break;
1105 parseSwitch();
1106 return;
1107 case tok::kw_default:
1108 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1109 // 'default: string' field declaration.
1110 break;
1111 nextToken();
1112 if (FormatTok->is(tok::colon)) {
1113 parseLabel();
1114 return;
1115 }
1116 // e.g. "default void f() {}" in a Java interface.
1117 break;
1118 case tok::kw_case:
1119 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1120 // 'case: string' field declaration.
1121 break;
1122 parseCaseLabel();
1123 return;
1124 case tok::kw_try:
1125 case tok::kw___try:
1126 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1127 // field/method declaration.
1128 break;
1129 parseTryCatch();
1130 return;
1131 case tok::kw_extern:
1132 nextToken();
1133 if (FormatTok->Tok.is(tok::string_literal)) {
1134 nextToken();
1135 if (FormatTok->Tok.is(tok::l_brace)) {
1136 if (!Style.IndentExternBlock) {
1137 if (Style.BraceWrapping.AfterExternBlock) {
1138 addUnwrappedLine();
1139 }
1140 unsigned AddLevels = Style.BraceWrapping.AfterExternBlock ? 1u : 0u;
1141 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1142 } else {
1143 unsigned AddLevels =
1144 Style.IndentExternBlock == FormatStyle::IEBS_Indent ? 1u : 0u;
1145 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1146 }
1147 addUnwrappedLine();
1148 return;
1149 }
1150 }
1151 break;
1152 case tok::kw_export:
1153 if (Style.Language == FormatStyle::LK_JavaScript) {
1154 parseJavaScriptEs6ImportExport();
1155 return;
1156 }
1157 if (!Style.isCpp())
1158 break;
1159 // Handle C++ "(inline|export) namespace".
1160 LLVM_FALLTHROUGH;
1161 case tok::kw_inline:
1162 nextToken();
1163 if (FormatTok->Tok.is(tok::kw_namespace)) {
1164 parseNamespace();
1165 return;
1166 }
1167 break;
1168 case tok::identifier:
1169 if (FormatTok->is(TT_ForEachMacro)) {
1170 parseForOrWhileLoop();
1171 return;
1172 }
1173 if (FormatTok->is(TT_MacroBlockBegin)) {
1174 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1175 /*MunchSemi=*/false);
1176 return;
1177 }
1178 if (FormatTok->is(Keywords.kw_import)) {
1179 if (Style.Language == FormatStyle::LK_JavaScript) {
1180 parseJavaScriptEs6ImportExport();
1181 return;
1182 }
1183 if (Style.Language == FormatStyle::LK_Proto) {
1184 nextToken();
1185 if (FormatTok->is(tok::kw_public))
1186 nextToken();
1187 if (!FormatTok->is(tok::string_literal))
1188 return;
1189 nextToken();
1190 if (FormatTok->is(tok::semi))
1191 nextToken();
1192 addUnwrappedLine();
1193 return;
1194 }
1195 }
1196 if (Style.isCpp() &&
1197 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1198 Keywords.kw_slots, Keywords.kw_qslots)) {
1199 nextToken();
1200 if (FormatTok->is(tok::colon)) {
1201 nextToken();
1202 addUnwrappedLine();
1203 return;
1204 }
1205 }
1206 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1207 parseStatementMacro();
1208 return;
1209 }
1210 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1211 parseNamespace();
1212 return;
1213 }
1214 // In all other cases, parse the declaration.
1215 break;
1216 default:
1217 break;
1218 }
1219 do {
1220 const FormatToken *Previous = FormatTok->Previous;
1221 switch (FormatTok->Tok.getKind()) {
1222 case tok::at:
1223 nextToken();
1224 if (FormatTok->Tok.is(tok::l_brace)) {
1225 nextToken();
1226 parseBracedList();
1227 break;
1228 } else if (Style.Language == FormatStyle::LK_Java &&
1229 FormatTok->is(Keywords.kw_interface)) {
1230 nextToken();
1231 break;
1232 }
1233 switch (FormatTok->Tok.getObjCKeywordID()) {
1234 case tok::objc_public:
1235 case tok::objc_protected:
1236 case tok::objc_package:
1237 case tok::objc_private:
1238 return parseAccessSpecifier();
1239 case tok::objc_interface:
1240 case tok::objc_implementation:
1241 return parseObjCInterfaceOrImplementation();
1242 case tok::objc_protocol:
1243 if (parseObjCProtocol())
1244 return;
1245 break;
1246 case tok::objc_end:
1247 return; // Handled by the caller.
1248 case tok::objc_optional:
1249 case tok::objc_required:
1250 nextToken();
1251 addUnwrappedLine();
1252 return;
1253 case tok::objc_autoreleasepool:
1254 nextToken();
1255 if (FormatTok->Tok.is(tok::l_brace)) {
1256 if (Style.BraceWrapping.AfterControlStatement ==
1257 FormatStyle::BWACS_Always)
1258 addUnwrappedLine();
1259 parseBlock(/*MustBeDeclaration=*/false);
1260 }
1261 addUnwrappedLine();
1262 return;
1263 case tok::objc_synchronized:
1264 nextToken();
1265 if (FormatTok->Tok.is(tok::l_paren))
1266 // Skip synchronization object
1267 parseParens();
1268 if (FormatTok->Tok.is(tok::l_brace)) {
1269 if (Style.BraceWrapping.AfterControlStatement ==
1270 FormatStyle::BWACS_Always)
1271 addUnwrappedLine();
1272 parseBlock(/*MustBeDeclaration=*/false);
1273 }
1274 addUnwrappedLine();
1275 return;
1276 case tok::objc_try:
1277 // This branch isn't strictly necessary (the kw_try case below would
1278 // do this too after the tok::at is parsed above). But be explicit.
1279 parseTryCatch();
1280 return;
1281 default:
1282 break;
1283 }
1284 break;
1285 case tok::kw_concept:
1286 parseConcept();
1287 break;
1288 case tok::kw_requires:
1289 parseRequires();
1290 break;
1291 case tok::kw_enum:
1292 // Ignore if this is part of "template <enum ...".
1293 if (Previous && Previous->is(tok::less)) {
1294 nextToken();
1295 break;
1296 }
1297
1298 // parseEnum falls through and does not yet add an unwrapped line as an
1299 // enum definition can start a structural element.
1300 if (!parseEnum())
1301 break;
1302 // This only applies for C++.
1303 if (!Style.isCpp()) {
1304 addUnwrappedLine();
1305 return;
1306 }
1307 break;
1308 case tok::kw_typedef:
1309 nextToken();
1310 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1311 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1312 Keywords.kw_CF_CLOSED_ENUM,
1313 Keywords.kw_NS_CLOSED_ENUM))
1314 parseEnum();
1315 break;
1316 case tok::kw_struct:
1317 case tok::kw_union:
1318 case tok::kw_class:
1319 if (parseStructLike()) {
1320 return;
1321 }
1322 break;
1323 case tok::period:
1324 nextToken();
1325 // In Java, classes have an implicit static member "class".
1326 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1327 FormatTok->is(tok::kw_class))
1328 nextToken();
1329 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1330 FormatTok->Tok.getIdentifierInfo())
1331 // JavaScript only has pseudo keywords, all keywords are allowed to
1332 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1333 nextToken();
1334 break;
1335 case tok::semi:
1336 nextToken();
1337 addUnwrappedLine();
1338 return;
1339 case tok::r_brace:
1340 addUnwrappedLine();
1341 return;
1342 case tok::l_paren:
1343 parseParens();
1344 break;
1345 case tok::kw_operator:
1346 nextToken();
1347 if (FormatTok->isBinaryOperator())
1348 nextToken();
1349 break;
1350 case tok::caret:
1351 nextToken();
1352 if (FormatTok->Tok.isAnyIdentifier() ||
1353 FormatTok->isSimpleTypeSpecifier())
1354 nextToken();
1355 if (FormatTok->is(tok::l_paren))
1356 parseParens();
1357 if (FormatTok->is(tok::l_brace))
1358 parseChildBlock();
1359 break;
1360 case tok::l_brace:
1361 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1362 // A block outside of parentheses must be the last part of a
1363 // structural element.
1364 // FIXME: Figure out cases where this is not true, and add projections
1365 // for them (the one we know is missing are lambdas).
1366 if (Style.BraceWrapping.AfterFunction)
1367 addUnwrappedLine();
1368 FormatTok->setType(TT_FunctionLBrace);
1369 parseBlock(/*MustBeDeclaration=*/false);
1370 addUnwrappedLine();
1371 return;
1372 }
1373 // Otherwise this was a braced init list, and the structural
1374 // element continues.
1375 break;
1376 case tok::kw_try:
1377 if (Style.Language == FormatStyle::LK_JavaScript &&
1378 Line->MustBeDeclaration) {
1379 // field/method declaration.
1380 nextToken();
1381 break;
1382 }
1383 // We arrive here when parsing function-try blocks.
1384 if (Style.BraceWrapping.AfterFunction)
1385 addUnwrappedLine();
1386 parseTryCatch();
1387 return;
1388 case tok::identifier: {
1389 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1390 Line->MustBeDeclaration) {
1391 addUnwrappedLine();
1392 parseCSharpGenericTypeConstraint();
1393 break;
1394 }
1395 if (FormatTok->is(TT_MacroBlockEnd)) {
1396 addUnwrappedLine();
1397 return;
1398 }
1399
1400 // Function declarations (as opposed to function expressions) are parsed
1401 // on their own unwrapped line by continuing this loop. Function
1402 // expressions (functions that are not on their own line) must not create
1403 // a new unwrapped line, so they are special cased below.
1404 size_t TokenCount = Line->Tokens.size();
1405 if (Style.Language == FormatStyle::LK_JavaScript &&
1406 FormatTok->is(Keywords.kw_function) &&
1407 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1408 Keywords.kw_async)))) {
1409 tryToParseJSFunction();
1410 break;
1411 }
1412 if ((Style.Language == FormatStyle::LK_JavaScript ||
1413 Style.Language == FormatStyle::LK_Java) &&
1414 FormatTok->is(Keywords.kw_interface)) {
1415 if (Style.Language == FormatStyle::LK_JavaScript) {
1416 // In JavaScript/TypeScript, "interface" can be used as a standalone
1417 // identifier, e.g. in `var interface = 1;`. If "interface" is
1418 // followed by another identifier, it is very like to be an actual
1419 // interface declaration.
1420 unsigned StoredPosition = Tokens->getPosition();
1421 FormatToken *Next = Tokens->getNextToken();
1422 FormatTok = Tokens->setPosition(StoredPosition);
1423 if (Next && !mustBeJSIdent(Keywords, Next)) {
1424 nextToken();
1425 break;
1426 }
1427 }
1428 parseRecord();
1429 addUnwrappedLine();
1430 return;
1431 }
1432
1433 if (FormatTok->is(Keywords.kw_interface)) {
1434 if (parseStructLike()) {
1435 return;
1436 }
1437 break;
1438 }
1439
1440 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1441 parseStatementMacro();
1442 return;
1443 }
1444
1445 // See if the following token should start a new unwrapped line.
1446 StringRef Text = FormatTok->TokenText;
1447 nextToken();
1448
1449 // JS doesn't have macros, and within classes colons indicate fields, not
1450 // labels.
1451 if (Style.Language == FormatStyle::LK_JavaScript)
1452 break;
1453
1454 TokenCount = Line->Tokens.size();
1455 if (TokenCount == 1 ||
1456 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1457 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1458 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1459 parseLabel(!Style.IndentGotoLabels);
1460 return;
1461 }
1462 // Recognize function-like macro usages without trailing semicolon as
1463 // well as free-standing macros like Q_OBJECT.
1464 bool FunctionLike = FormatTok->is(tok::l_paren);
1465 if (FunctionLike)
1466 parseParens();
1467
1468 bool FollowedByNewline =
1469 CommentsBeforeNextToken.empty()
1470 ? FormatTok->NewlinesBefore > 0
1471 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1472
1473 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1474 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1475 addUnwrappedLine();
1476 return;
1477 }
1478 }
1479 break;
1480 }
1481 case tok::equal:
1482 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1483 // TT_FatArrow. The always start an expression or a child block if
1484 // followed by a curly.
1485 if (FormatTok->is(TT_FatArrow)) {
1486 nextToken();
1487 if (FormatTok->is(tok::l_brace)) {
1488 // C# may break after => if the next character is a newline.
1489 if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1490 // calling `addUnwrappedLine()` here causes odd parsing errors.
1491 FormatTok->MustBreakBefore = true;
1492 }
1493 parseChildBlock();
1494 }
1495 break;
1496 }
1497
1498 nextToken();
1499 if (FormatTok->Tok.is(tok::l_brace)) {
1500 // Block kind should probably be set to BK_BracedInit for any language.
1501 // C# needs this change to ensure that array initialisers and object
1502 // initialisers are indented the same way.
1503 if (Style.isCSharp())
1504 FormatTok->setBlockKind(BK_BracedInit);
1505 nextToken();
1506 parseBracedList();
1507 } else if (Style.Language == FormatStyle::LK_Proto &&
1508 FormatTok->Tok.is(tok::less)) {
1509 nextToken();
1510 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1511 /*ClosingBraceKind=*/tok::greater);
1512 }
1513 break;
1514 case tok::l_square:
1515 parseSquare();
1516 break;
1517 case tok::kw_new:
1518 parseNew();
1519 break;
1520 default:
1521 nextToken();
1522 break;
1523 }
1524 } while (!eof());
1525 }
1526
tryToParsePropertyAccessor()1527 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1528 assert(FormatTok->is(tok::l_brace));
1529 if (!Style.isCSharp())
1530 return false;
1531 // See if it's a property accessor.
1532 if (FormatTok->Previous->isNot(tok::identifier))
1533 return false;
1534
1535 // See if we are inside a property accessor.
1536 //
1537 // Record the current tokenPosition so that we can advance and
1538 // reset the current token. `Next` is not set yet so we need
1539 // another way to advance along the token stream.
1540 unsigned int StoredPosition = Tokens->getPosition();
1541 FormatToken *Tok = Tokens->getNextToken();
1542
1543 // A trivial property accessor is of the form:
1544 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1545 // Track these as they do not require line breaks to be introduced.
1546 bool HasGetOrSet = false;
1547 bool IsTrivialPropertyAccessor = true;
1548 while (!eof()) {
1549 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1550 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1551 Keywords.kw_set)) {
1552 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1553 HasGetOrSet = true;
1554 Tok = Tokens->getNextToken();
1555 continue;
1556 }
1557 if (Tok->isNot(tok::r_brace))
1558 IsTrivialPropertyAccessor = false;
1559 break;
1560 }
1561
1562 if (!HasGetOrSet) {
1563 Tokens->setPosition(StoredPosition);
1564 return false;
1565 }
1566
1567 // Try to parse the property accessor:
1568 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1569 Tokens->setPosition(StoredPosition);
1570 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1571 addUnwrappedLine();
1572 nextToken();
1573 do {
1574 switch (FormatTok->Tok.getKind()) {
1575 case tok::r_brace:
1576 nextToken();
1577 if (FormatTok->is(tok::equal)) {
1578 while (!eof() && FormatTok->isNot(tok::semi))
1579 nextToken();
1580 nextToken();
1581 }
1582 addUnwrappedLine();
1583 return true;
1584 case tok::l_brace:
1585 ++Line->Level;
1586 parseBlock(/*MustBeDeclaration=*/true);
1587 addUnwrappedLine();
1588 --Line->Level;
1589 break;
1590 case tok::equal:
1591 if (FormatTok->is(TT_FatArrow)) {
1592 ++Line->Level;
1593 do {
1594 nextToken();
1595 } while (!eof() && FormatTok->isNot(tok::semi));
1596 nextToken();
1597 addUnwrappedLine();
1598 --Line->Level;
1599 break;
1600 }
1601 nextToken();
1602 break;
1603 default:
1604 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1605 !IsTrivialPropertyAccessor) {
1606 // Non-trivial get/set needs to be on its own line.
1607 addUnwrappedLine();
1608 }
1609 nextToken();
1610 }
1611 } while (!eof());
1612
1613 // Unreachable for well-formed code (paired '{' and '}').
1614 return true;
1615 }
1616
tryToParseLambda()1617 bool UnwrappedLineParser::tryToParseLambda() {
1618 if (!Style.isCpp()) {
1619 nextToken();
1620 return false;
1621 }
1622 assert(FormatTok->is(tok::l_square));
1623 FormatToken &LSquare = *FormatTok;
1624 if (!tryToParseLambdaIntroducer())
1625 return false;
1626
1627 bool SeenArrow = false;
1628
1629 while (FormatTok->isNot(tok::l_brace)) {
1630 if (FormatTok->isSimpleTypeSpecifier()) {
1631 nextToken();
1632 continue;
1633 }
1634 switch (FormatTok->Tok.getKind()) {
1635 case tok::l_brace:
1636 break;
1637 case tok::l_paren:
1638 parseParens();
1639 break;
1640 case tok::amp:
1641 case tok::star:
1642 case tok::kw_const:
1643 case tok::comma:
1644 case tok::less:
1645 case tok::greater:
1646 case tok::identifier:
1647 case tok::numeric_constant:
1648 case tok::coloncolon:
1649 case tok::kw_class:
1650 case tok::kw_mutable:
1651 case tok::kw_noexcept:
1652 case tok::kw_template:
1653 case tok::kw_typename:
1654 nextToken();
1655 break;
1656 // Specialization of a template with an integer parameter can contain
1657 // arithmetic, logical, comparison and ternary operators.
1658 //
1659 // FIXME: This also accepts sequences of operators that are not in the scope
1660 // of a template argument list.
1661 //
1662 // In a C++ lambda a template type can only occur after an arrow. We use
1663 // this as an heuristic to distinguish between Objective-C expressions
1664 // followed by an `a->b` expression, such as:
1665 // ([obj func:arg] + a->b)
1666 // Otherwise the code below would parse as a lambda.
1667 //
1668 // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1669 // explicit template lists: []<bool b = true && false>(U &&u){}
1670 case tok::plus:
1671 case tok::minus:
1672 case tok::exclaim:
1673 case tok::tilde:
1674 case tok::slash:
1675 case tok::percent:
1676 case tok::lessless:
1677 case tok::pipe:
1678 case tok::pipepipe:
1679 case tok::ampamp:
1680 case tok::caret:
1681 case tok::equalequal:
1682 case tok::exclaimequal:
1683 case tok::greaterequal:
1684 case tok::lessequal:
1685 case tok::question:
1686 case tok::colon:
1687 case tok::ellipsis:
1688 case tok::kw_true:
1689 case tok::kw_false:
1690 if (SeenArrow) {
1691 nextToken();
1692 break;
1693 }
1694 return true;
1695 case tok::arrow:
1696 // This might or might not actually be a lambda arrow (this could be an
1697 // ObjC method invocation followed by a dereferencing arrow). We might
1698 // reset this back to TT_Unknown in TokenAnnotator.
1699 FormatTok->setType(TT_LambdaArrow);
1700 SeenArrow = true;
1701 nextToken();
1702 break;
1703 default:
1704 return true;
1705 }
1706 }
1707 FormatTok->setType(TT_LambdaLBrace);
1708 LSquare.setType(TT_LambdaLSquare);
1709 parseChildBlock();
1710 return true;
1711 }
1712
tryToParseLambdaIntroducer()1713 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1714 const FormatToken *Previous = FormatTok->Previous;
1715 if (Previous &&
1716 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1717 tok::kw_delete, tok::l_square) ||
1718 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1719 Previous->isSimpleTypeSpecifier())) {
1720 nextToken();
1721 return false;
1722 }
1723 nextToken();
1724 if (FormatTok->is(tok::l_square)) {
1725 return false;
1726 }
1727 parseSquare(/*LambdaIntroducer=*/true);
1728 return true;
1729 }
1730
tryToParseJSFunction()1731 void UnwrappedLineParser::tryToParseJSFunction() {
1732 assert(FormatTok->is(Keywords.kw_function) ||
1733 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1734 if (FormatTok->is(Keywords.kw_async))
1735 nextToken();
1736 // Consume "function".
1737 nextToken();
1738
1739 // Consume * (generator function). Treat it like C++'s overloaded operators.
1740 if (FormatTok->is(tok::star)) {
1741 FormatTok->setType(TT_OverloadedOperator);
1742 nextToken();
1743 }
1744
1745 // Consume function name.
1746 if (FormatTok->is(tok::identifier))
1747 nextToken();
1748
1749 if (FormatTok->isNot(tok::l_paren))
1750 return;
1751
1752 // Parse formal parameter list.
1753 parseParens();
1754
1755 if (FormatTok->is(tok::colon)) {
1756 // Parse a type definition.
1757 nextToken();
1758
1759 // Eat the type declaration. For braced inline object types, balance braces,
1760 // otherwise just parse until finding an l_brace for the function body.
1761 if (FormatTok->is(tok::l_brace))
1762 tryToParseBracedList();
1763 else
1764 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1765 nextToken();
1766 }
1767
1768 if (FormatTok->is(tok::semi))
1769 return;
1770
1771 parseChildBlock();
1772 }
1773
tryToParseBracedList()1774 bool UnwrappedLineParser::tryToParseBracedList() {
1775 if (FormatTok->is(BK_Unknown))
1776 calculateBraceTypes();
1777 assert(FormatTok->isNot(BK_Unknown));
1778 if (FormatTok->is(BK_Block))
1779 return false;
1780 nextToken();
1781 parseBracedList();
1782 return true;
1783 }
1784
parseBracedList(bool ContinueOnSemicolons,bool IsEnum,tok::TokenKind ClosingBraceKind)1785 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1786 bool IsEnum,
1787 tok::TokenKind ClosingBraceKind) {
1788 bool HasError = false;
1789
1790 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1791 // replace this by using parseAssigmentExpression() inside.
1792 do {
1793 if (Style.isCSharp()) {
1794 if (FormatTok->is(TT_FatArrow)) {
1795 nextToken();
1796 // Fat arrows can be followed by simple expressions or by child blocks
1797 // in curly braces.
1798 if (FormatTok->is(tok::l_brace)) {
1799 parseChildBlock();
1800 continue;
1801 }
1802 }
1803 }
1804 if (Style.Language == FormatStyle::LK_JavaScript) {
1805 if (FormatTok->is(Keywords.kw_function) ||
1806 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1807 tryToParseJSFunction();
1808 continue;
1809 }
1810 if (FormatTok->is(TT_FatArrow)) {
1811 nextToken();
1812 // Fat arrows can be followed by simple expressions or by child blocks
1813 // in curly braces.
1814 if (FormatTok->is(tok::l_brace)) {
1815 parseChildBlock();
1816 continue;
1817 }
1818 }
1819 if (FormatTok->is(tok::l_brace)) {
1820 // Could be a method inside of a braced list `{a() { return 1; }}`.
1821 if (tryToParseBracedList())
1822 continue;
1823 parseChildBlock();
1824 }
1825 }
1826 if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1827 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1828 addUnwrappedLine();
1829 nextToken();
1830 return !HasError;
1831 }
1832 switch (FormatTok->Tok.getKind()) {
1833 case tok::caret:
1834 nextToken();
1835 if (FormatTok->is(tok::l_brace)) {
1836 parseChildBlock();
1837 }
1838 break;
1839 case tok::l_square:
1840 if (Style.isCSharp())
1841 parseSquare();
1842 else
1843 tryToParseLambda();
1844 break;
1845 case tok::l_paren:
1846 parseParens();
1847 // JavaScript can just have free standing methods and getters/setters in
1848 // object literals. Detect them by a "{" following ")".
1849 if (Style.Language == FormatStyle::LK_JavaScript) {
1850 if (FormatTok->is(tok::l_brace))
1851 parseChildBlock();
1852 break;
1853 }
1854 break;
1855 case tok::l_brace:
1856 // Assume there are no blocks inside a braced init list apart
1857 // from the ones we explicitly parse out (like lambdas).
1858 FormatTok->setBlockKind(BK_BracedInit);
1859 nextToken();
1860 parseBracedList();
1861 break;
1862 case tok::less:
1863 if (Style.Language == FormatStyle::LK_Proto) {
1864 nextToken();
1865 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1866 /*ClosingBraceKind=*/tok::greater);
1867 } else {
1868 nextToken();
1869 }
1870 break;
1871 case tok::semi:
1872 // JavaScript (or more precisely TypeScript) can have semicolons in braced
1873 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1874 // used for error recovery if we have otherwise determined that this is
1875 // a braced list.
1876 if (Style.Language == FormatStyle::LK_JavaScript) {
1877 nextToken();
1878 break;
1879 }
1880 HasError = true;
1881 if (!ContinueOnSemicolons)
1882 return !HasError;
1883 nextToken();
1884 break;
1885 case tok::comma:
1886 nextToken();
1887 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1888 addUnwrappedLine();
1889 break;
1890 default:
1891 nextToken();
1892 break;
1893 }
1894 } while (!eof());
1895 return false;
1896 }
1897
parseParens()1898 void UnwrappedLineParser::parseParens() {
1899 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1900 nextToken();
1901 do {
1902 switch (FormatTok->Tok.getKind()) {
1903 case tok::l_paren:
1904 parseParens();
1905 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1906 parseChildBlock();
1907 break;
1908 case tok::r_paren:
1909 nextToken();
1910 return;
1911 case tok::r_brace:
1912 // A "}" inside parenthesis is an error if there wasn't a matching "{".
1913 return;
1914 case tok::l_square:
1915 tryToParseLambda();
1916 break;
1917 case tok::l_brace:
1918 if (!tryToParseBracedList())
1919 parseChildBlock();
1920 break;
1921 case tok::at:
1922 nextToken();
1923 if (FormatTok->Tok.is(tok::l_brace)) {
1924 nextToken();
1925 parseBracedList();
1926 }
1927 break;
1928 case tok::kw_class:
1929 if (Style.Language == FormatStyle::LK_JavaScript)
1930 parseRecord(/*ParseAsExpr=*/true);
1931 else
1932 nextToken();
1933 break;
1934 case tok::identifier:
1935 if (Style.Language == FormatStyle::LK_JavaScript &&
1936 (FormatTok->is(Keywords.kw_function) ||
1937 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1938 tryToParseJSFunction();
1939 else
1940 nextToken();
1941 break;
1942 default:
1943 nextToken();
1944 break;
1945 }
1946 } while (!eof());
1947 }
1948
parseSquare(bool LambdaIntroducer)1949 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1950 if (!LambdaIntroducer) {
1951 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1952 if (tryToParseLambda())
1953 return;
1954 }
1955 do {
1956 switch (FormatTok->Tok.getKind()) {
1957 case tok::l_paren:
1958 parseParens();
1959 break;
1960 case tok::r_square:
1961 nextToken();
1962 return;
1963 case tok::r_brace:
1964 // A "}" inside parenthesis is an error if there wasn't a matching "{".
1965 return;
1966 case tok::l_square:
1967 parseSquare();
1968 break;
1969 case tok::l_brace: {
1970 if (!tryToParseBracedList())
1971 parseChildBlock();
1972 break;
1973 }
1974 case tok::at:
1975 nextToken();
1976 if (FormatTok->Tok.is(tok::l_brace)) {
1977 nextToken();
1978 parseBracedList();
1979 }
1980 break;
1981 default:
1982 nextToken();
1983 break;
1984 }
1985 } while (!eof());
1986 }
1987
parseIfThenElse()1988 void UnwrappedLineParser::parseIfThenElse() {
1989 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1990 nextToken();
1991 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1992 nextToken();
1993 if (FormatTok->Tok.is(tok::l_paren))
1994 parseParens();
1995 // handle [[likely]] / [[unlikely]]
1996 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
1997 parseSquare();
1998 bool NeedsUnwrappedLine = false;
1999 if (FormatTok->Tok.is(tok::l_brace)) {
2000 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2001 parseBlock(/*MustBeDeclaration=*/false);
2002 if (Style.BraceWrapping.BeforeElse)
2003 addUnwrappedLine();
2004 else
2005 NeedsUnwrappedLine = true;
2006 } else {
2007 addUnwrappedLine();
2008 ++Line->Level;
2009 parseStructuralElement();
2010 --Line->Level;
2011 }
2012 if (FormatTok->Tok.is(tok::kw_else)) {
2013 nextToken();
2014 // handle [[likely]] / [[unlikely]]
2015 if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute())
2016 parseSquare();
2017 if (FormatTok->Tok.is(tok::l_brace)) {
2018 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2019 parseBlock(/*MustBeDeclaration=*/false);
2020 addUnwrappedLine();
2021 } else if (FormatTok->Tok.is(tok::kw_if)) {
2022 parseIfThenElse();
2023 } else {
2024 addUnwrappedLine();
2025 ++Line->Level;
2026 parseStructuralElement();
2027 if (FormatTok->is(tok::eof))
2028 addUnwrappedLine();
2029 --Line->Level;
2030 }
2031 } else if (NeedsUnwrappedLine) {
2032 addUnwrappedLine();
2033 }
2034 }
2035
parseTryCatch()2036 void UnwrappedLineParser::parseTryCatch() {
2037 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2038 nextToken();
2039 bool NeedsUnwrappedLine = false;
2040 if (FormatTok->is(tok::colon)) {
2041 // We are in a function try block, what comes is an initializer list.
2042 nextToken();
2043
2044 // In case identifiers were removed by clang-tidy, what might follow is
2045 // multiple commas in sequence - before the first identifier.
2046 while (FormatTok->is(tok::comma))
2047 nextToken();
2048
2049 while (FormatTok->is(tok::identifier)) {
2050 nextToken();
2051 if (FormatTok->is(tok::l_paren))
2052 parseParens();
2053 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2054 FormatTok->is(tok::l_brace)) {
2055 do {
2056 nextToken();
2057 } while (!FormatTok->is(tok::r_brace));
2058 nextToken();
2059 }
2060
2061 // In case identifiers were removed by clang-tidy, what might follow is
2062 // multiple commas in sequence - after the first identifier.
2063 while (FormatTok->is(tok::comma))
2064 nextToken();
2065 }
2066 }
2067 // Parse try with resource.
2068 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2069 parseParens();
2070 }
2071 if (FormatTok->is(tok::l_brace)) {
2072 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2073 parseBlock(/*MustBeDeclaration=*/false);
2074 if (Style.BraceWrapping.BeforeCatch) {
2075 addUnwrappedLine();
2076 } else {
2077 NeedsUnwrappedLine = true;
2078 }
2079 } else if (!FormatTok->is(tok::kw_catch)) {
2080 // The C++ standard requires a compound-statement after a try.
2081 // If there's none, we try to assume there's a structuralElement
2082 // and try to continue.
2083 addUnwrappedLine();
2084 ++Line->Level;
2085 parseStructuralElement();
2086 --Line->Level;
2087 }
2088 while (1) {
2089 if (FormatTok->is(tok::at))
2090 nextToken();
2091 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2092 tok::kw___finally) ||
2093 ((Style.Language == FormatStyle::LK_Java ||
2094 Style.Language == FormatStyle::LK_JavaScript) &&
2095 FormatTok->is(Keywords.kw_finally)) ||
2096 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2097 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2098 break;
2099 nextToken();
2100 while (FormatTok->isNot(tok::l_brace)) {
2101 if (FormatTok->is(tok::l_paren)) {
2102 parseParens();
2103 continue;
2104 }
2105 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2106 return;
2107 nextToken();
2108 }
2109 NeedsUnwrappedLine = false;
2110 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2111 parseBlock(/*MustBeDeclaration=*/false);
2112 if (Style.BraceWrapping.BeforeCatch)
2113 addUnwrappedLine();
2114 else
2115 NeedsUnwrappedLine = true;
2116 }
2117 if (NeedsUnwrappedLine)
2118 addUnwrappedLine();
2119 }
2120
parseNamespace()2121 void UnwrappedLineParser::parseNamespace() {
2122 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2123 "'namespace' expected");
2124
2125 const FormatToken &InitialToken = *FormatTok;
2126 nextToken();
2127 if (InitialToken.is(TT_NamespaceMacro)) {
2128 parseParens();
2129 } else {
2130 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2131 tok::l_square)) {
2132 if (FormatTok->is(tok::l_square))
2133 parseSquare();
2134 else
2135 nextToken();
2136 }
2137 }
2138 if (FormatTok->Tok.is(tok::l_brace)) {
2139 if (ShouldBreakBeforeBrace(Style, InitialToken))
2140 addUnwrappedLine();
2141
2142 unsigned AddLevels =
2143 Style.NamespaceIndentation == FormatStyle::NI_All ||
2144 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2145 DeclarationScopeStack.size() > 1)
2146 ? 1u
2147 : 0u;
2148 bool ManageWhitesmithsBraces =
2149 AddLevels == 0u &&
2150 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2151
2152 // If we're in Whitesmiths mode, indent the brace if we're not indenting
2153 // the whole block.
2154 if (ManageWhitesmithsBraces)
2155 ++Line->Level;
2156
2157 parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2158 /*MunchSemi=*/true,
2159 /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2160
2161 // Munch the semicolon after a namespace. This is more common than one would
2162 // think. Putting the semicolon into its own line is very ugly.
2163 if (FormatTok->Tok.is(tok::semi))
2164 nextToken();
2165
2166 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2167
2168 if (ManageWhitesmithsBraces)
2169 --Line->Level;
2170 }
2171 // FIXME: Add error handling.
2172 }
2173
parseNew()2174 void UnwrappedLineParser::parseNew() {
2175 assert(FormatTok->is(tok::kw_new) && "'new' expected");
2176 nextToken();
2177
2178 if (Style.isCSharp()) {
2179 do {
2180 if (FormatTok->is(tok::l_brace))
2181 parseBracedList();
2182
2183 if (FormatTok->isOneOf(tok::semi, tok::comma))
2184 return;
2185
2186 nextToken();
2187 } while (!eof());
2188 }
2189
2190 if (Style.Language != FormatStyle::LK_Java)
2191 return;
2192
2193 // In Java, we can parse everything up to the parens, which aren't optional.
2194 do {
2195 // There should not be a ;, { or } before the new's open paren.
2196 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2197 return;
2198
2199 // Consume the parens.
2200 if (FormatTok->is(tok::l_paren)) {
2201 parseParens();
2202
2203 // If there is a class body of an anonymous class, consume that as child.
2204 if (FormatTok->is(tok::l_brace))
2205 parseChildBlock();
2206 return;
2207 }
2208 nextToken();
2209 } while (!eof());
2210 }
2211
parseForOrWhileLoop()2212 void UnwrappedLineParser::parseForOrWhileLoop() {
2213 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2214 "'for', 'while' or foreach macro expected");
2215 nextToken();
2216 // JS' for await ( ...
2217 if (Style.Language == FormatStyle::LK_JavaScript &&
2218 FormatTok->is(Keywords.kw_await))
2219 nextToken();
2220 if (FormatTok->Tok.is(tok::l_paren))
2221 parseParens();
2222 if (FormatTok->Tok.is(tok::l_brace)) {
2223 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2224 parseBlock(/*MustBeDeclaration=*/false);
2225 addUnwrappedLine();
2226 } else {
2227 addUnwrappedLine();
2228 ++Line->Level;
2229 parseStructuralElement();
2230 --Line->Level;
2231 }
2232 }
2233
parseDoWhile()2234 void UnwrappedLineParser::parseDoWhile() {
2235 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2236 nextToken();
2237 if (FormatTok->Tok.is(tok::l_brace)) {
2238 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2239 parseBlock(/*MustBeDeclaration=*/false);
2240 if (Style.BraceWrapping.BeforeWhile)
2241 addUnwrappedLine();
2242 } else {
2243 addUnwrappedLine();
2244 ++Line->Level;
2245 parseStructuralElement();
2246 --Line->Level;
2247 }
2248
2249 // FIXME: Add error handling.
2250 if (!FormatTok->Tok.is(tok::kw_while)) {
2251 addUnwrappedLine();
2252 return;
2253 }
2254
2255 // If in Whitesmiths mode, the line with the while() needs to be indented
2256 // to the same level as the block.
2257 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2258 ++Line->Level;
2259
2260 nextToken();
2261 parseStructuralElement();
2262 }
2263
parseLabel(bool LeftAlignLabel)2264 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2265 nextToken();
2266 unsigned OldLineLevel = Line->Level;
2267 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2268 --Line->Level;
2269 if (LeftAlignLabel)
2270 Line->Level = 0;
2271
2272 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2273 FormatTok->Tok.is(tok::l_brace)) {
2274
2275 CompoundStatementIndenter Indenter(this, Line->Level,
2276 Style.BraceWrapping.AfterCaseLabel,
2277 Style.BraceWrapping.IndentBraces);
2278 parseBlock(/*MustBeDeclaration=*/false);
2279 if (FormatTok->Tok.is(tok::kw_break)) {
2280 if (Style.BraceWrapping.AfterControlStatement ==
2281 FormatStyle::BWACS_Always) {
2282 addUnwrappedLine();
2283 if (!Style.IndentCaseBlocks &&
2284 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2285 Line->Level++;
2286 }
2287 }
2288 parseStructuralElement();
2289 }
2290 addUnwrappedLine();
2291 } else {
2292 if (FormatTok->is(tok::semi))
2293 nextToken();
2294 addUnwrappedLine();
2295 }
2296 Line->Level = OldLineLevel;
2297 if (FormatTok->isNot(tok::l_brace)) {
2298 parseStructuralElement();
2299 addUnwrappedLine();
2300 }
2301 }
2302
parseCaseLabel()2303 void UnwrappedLineParser::parseCaseLabel() {
2304 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2305
2306 // FIXME: fix handling of complex expressions here.
2307 do {
2308 nextToken();
2309 } while (!eof() && !FormatTok->Tok.is(tok::colon));
2310 parseLabel();
2311 }
2312
parseSwitch()2313 void UnwrappedLineParser::parseSwitch() {
2314 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2315 nextToken();
2316 if (FormatTok->Tok.is(tok::l_paren))
2317 parseParens();
2318 if (FormatTok->Tok.is(tok::l_brace)) {
2319 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2320 parseBlock(/*MustBeDeclaration=*/false);
2321 addUnwrappedLine();
2322 } else {
2323 addUnwrappedLine();
2324 ++Line->Level;
2325 parseStructuralElement();
2326 --Line->Level;
2327 }
2328 }
2329
parseAccessSpecifier()2330 void UnwrappedLineParser::parseAccessSpecifier() {
2331 nextToken();
2332 // Understand Qt's slots.
2333 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2334 nextToken();
2335 // Otherwise, we don't know what it is, and we'd better keep the next token.
2336 if (FormatTok->Tok.is(tok::colon))
2337 nextToken();
2338 addUnwrappedLine();
2339 }
2340
parseConcept()2341 void UnwrappedLineParser::parseConcept() {
2342 assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2343 nextToken();
2344 if (!FormatTok->Tok.is(tok::identifier))
2345 return;
2346 nextToken();
2347 if (!FormatTok->Tok.is(tok::equal))
2348 return;
2349 nextToken();
2350 if (FormatTok->Tok.is(tok::kw_requires)) {
2351 nextToken();
2352 parseRequiresExpression(Line->Level);
2353 } else {
2354 parseConstraintExpression(Line->Level);
2355 }
2356 }
2357
parseRequiresExpression(unsigned int OriginalLevel)2358 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2359 // requires (R range)
2360 if (FormatTok->Tok.is(tok::l_paren)) {
2361 parseParens();
2362 if (Style.IndentRequires && OriginalLevel != Line->Level) {
2363 addUnwrappedLine();
2364 --Line->Level;
2365 }
2366 }
2367
2368 if (FormatTok->Tok.is(tok::l_brace)) {
2369 if (Style.BraceWrapping.AfterFunction)
2370 addUnwrappedLine();
2371 FormatTok->setType(TT_FunctionLBrace);
2372 parseBlock(/*MustBeDeclaration=*/false);
2373 addUnwrappedLine();
2374 } else {
2375 parseConstraintExpression(OriginalLevel);
2376 }
2377 }
2378
parseConstraintExpression(unsigned int OriginalLevel)2379 void UnwrappedLineParser::parseConstraintExpression(
2380 unsigned int OriginalLevel) {
2381 // requires Id<T> && Id<T> || Id<T>
2382 while (
2383 FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2384 nextToken();
2385 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2386 tok::greater, tok::comma, tok::ellipsis)) {
2387 if (FormatTok->Tok.is(tok::less)) {
2388 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2389 /*ClosingBraceKind=*/tok::greater);
2390 continue;
2391 }
2392 nextToken();
2393 }
2394 if (FormatTok->Tok.is(tok::kw_requires)) {
2395 parseRequiresExpression(OriginalLevel);
2396 }
2397 if (FormatTok->Tok.is(tok::less)) {
2398 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2399 /*ClosingBraceKind=*/tok::greater);
2400 }
2401
2402 if (FormatTok->Tok.is(tok::l_paren)) {
2403 parseParens();
2404 }
2405 if (FormatTok->Tok.is(tok::l_brace)) {
2406 if (Style.BraceWrapping.AfterFunction)
2407 addUnwrappedLine();
2408 FormatTok->setType(TT_FunctionLBrace);
2409 parseBlock(/*MustBeDeclaration=*/false);
2410 }
2411 if (FormatTok->Tok.is(tok::semi)) {
2412 // Eat any trailing semi.
2413 nextToken();
2414 addUnwrappedLine();
2415 }
2416 if (FormatTok->Tok.is(tok::colon)) {
2417 return;
2418 }
2419 if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2420 if (FormatTok->Previous &&
2421 !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2422 tok::coloncolon)) {
2423 addUnwrappedLine();
2424 }
2425 if (Style.IndentRequires && OriginalLevel != Line->Level) {
2426 --Line->Level;
2427 }
2428 break;
2429 } else {
2430 FormatTok->setType(TT_ConstraintJunctions);
2431 }
2432
2433 nextToken();
2434 }
2435 }
2436
parseRequires()2437 void UnwrappedLineParser::parseRequires() {
2438 assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2439
2440 unsigned OriginalLevel = Line->Level;
2441 if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2442 addUnwrappedLine();
2443 if (Style.IndentRequires) {
2444 Line->Level++;
2445 }
2446 }
2447 nextToken();
2448
2449 parseRequiresExpression(OriginalLevel);
2450 }
2451
parseEnum()2452 bool UnwrappedLineParser::parseEnum() {
2453 // Won't be 'enum' for NS_ENUMs.
2454 if (FormatTok->Tok.is(tok::kw_enum))
2455 nextToken();
2456
2457 // In TypeScript, "enum" can also be used as property name, e.g. in interface
2458 // declarations. An "enum" keyword followed by a colon would be a syntax
2459 // error and thus assume it is just an identifier.
2460 if (Style.Language == FormatStyle::LK_JavaScript &&
2461 FormatTok->isOneOf(tok::colon, tok::question))
2462 return false;
2463
2464 // In protobuf, "enum" can be used as a field name.
2465 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2466 return false;
2467
2468 // Eat up enum class ...
2469 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2470 nextToken();
2471
2472 while (FormatTok->Tok.getIdentifierInfo() ||
2473 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2474 tok::greater, tok::comma, tok::question)) {
2475 nextToken();
2476 // We can have macros or attributes in between 'enum' and the enum name.
2477 if (FormatTok->is(tok::l_paren))
2478 parseParens();
2479 if (FormatTok->is(tok::identifier)) {
2480 nextToken();
2481 // If there are two identifiers in a row, this is likely an elaborate
2482 // return type. In Java, this can be "implements", etc.
2483 if (Style.isCpp() && FormatTok->is(tok::identifier))
2484 return false;
2485 }
2486 }
2487
2488 // Just a declaration or something is wrong.
2489 if (FormatTok->isNot(tok::l_brace))
2490 return true;
2491 FormatTok->setBlockKind(BK_Block);
2492
2493 if (Style.Language == FormatStyle::LK_Java) {
2494 // Java enums are different.
2495 parseJavaEnumBody();
2496 return true;
2497 }
2498 if (Style.Language == FormatStyle::LK_Proto) {
2499 parseBlock(/*MustBeDeclaration=*/true);
2500 return true;
2501 }
2502
2503 if (!Style.AllowShortEnumsOnASingleLine)
2504 addUnwrappedLine();
2505 // Parse enum body.
2506 nextToken();
2507 if (!Style.AllowShortEnumsOnASingleLine) {
2508 addUnwrappedLine();
2509 Line->Level += 1;
2510 }
2511 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2512 /*IsEnum=*/true);
2513 if (!Style.AllowShortEnumsOnASingleLine)
2514 Line->Level -= 1;
2515 if (HasError) {
2516 if (FormatTok->is(tok::semi))
2517 nextToken();
2518 addUnwrappedLine();
2519 }
2520 return true;
2521
2522 // There is no addUnwrappedLine() here so that we fall through to parsing a
2523 // structural element afterwards. Thus, in "enum A {} n, m;",
2524 // "} n, m;" will end up in one unwrapped line.
2525 }
2526
parseStructLike()2527 bool UnwrappedLineParser::parseStructLike() {
2528 // parseRecord falls through and does not yet add an unwrapped line as a
2529 // record declaration or definition can start a structural element.
2530 parseRecord();
2531 // This does not apply to Java, JavaScript and C#.
2532 if (Style.Language == FormatStyle::LK_Java ||
2533 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
2534 if (FormatTok->is(tok::semi))
2535 nextToken();
2536 addUnwrappedLine();
2537 return true;
2538 }
2539 return false;
2540 }
2541
2542 namespace {
2543 // A class used to set and restore the Token position when peeking
2544 // ahead in the token source.
2545 class ScopedTokenPosition {
2546 unsigned StoredPosition;
2547 FormatTokenSource *Tokens;
2548
2549 public:
ScopedTokenPosition(FormatTokenSource * Tokens)2550 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2551 assert(Tokens && "Tokens expected to not be null");
2552 StoredPosition = Tokens->getPosition();
2553 }
2554
~ScopedTokenPosition()2555 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2556 };
2557 } // namespace
2558
2559 // Look to see if we have [[ by looking ahead, if
2560 // its not then rewind to the original position.
tryToParseSimpleAttribute()2561 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2562 ScopedTokenPosition AutoPosition(Tokens);
2563 FormatToken *Tok = Tokens->getNextToken();
2564 // We already read the first [ check for the second.
2565 if (Tok && !Tok->is(tok::l_square)) {
2566 return false;
2567 }
2568 // Double check that the attribute is just something
2569 // fairly simple.
2570 while (Tok) {
2571 if (Tok->is(tok::r_square)) {
2572 break;
2573 }
2574 Tok = Tokens->getNextToken();
2575 }
2576 Tok = Tokens->getNextToken();
2577 if (Tok && !Tok->is(tok::r_square)) {
2578 return false;
2579 }
2580 Tok = Tokens->getNextToken();
2581 if (Tok && Tok->is(tok::semi)) {
2582 return false;
2583 }
2584 return true;
2585 }
2586
parseJavaEnumBody()2587 void UnwrappedLineParser::parseJavaEnumBody() {
2588 // Determine whether the enum is simple, i.e. does not have a semicolon or
2589 // constants with class bodies. Simple enums can be formatted like braced
2590 // lists, contracted to a single line, etc.
2591 unsigned StoredPosition = Tokens->getPosition();
2592 bool IsSimple = true;
2593 FormatToken *Tok = Tokens->getNextToken();
2594 while (Tok) {
2595 if (Tok->is(tok::r_brace))
2596 break;
2597 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2598 IsSimple = false;
2599 break;
2600 }
2601 // FIXME: This will also mark enums with braces in the arguments to enum
2602 // constants as "not simple". This is probably fine in practice, though.
2603 Tok = Tokens->getNextToken();
2604 }
2605 FormatTok = Tokens->setPosition(StoredPosition);
2606
2607 if (IsSimple) {
2608 nextToken();
2609 parseBracedList();
2610 addUnwrappedLine();
2611 return;
2612 }
2613
2614 // Parse the body of a more complex enum.
2615 // First add a line for everything up to the "{".
2616 nextToken();
2617 addUnwrappedLine();
2618 ++Line->Level;
2619
2620 // Parse the enum constants.
2621 while (FormatTok) {
2622 if (FormatTok->is(tok::l_brace)) {
2623 // Parse the constant's class body.
2624 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2625 /*MunchSemi=*/false);
2626 } else if (FormatTok->is(tok::l_paren)) {
2627 parseParens();
2628 } else if (FormatTok->is(tok::comma)) {
2629 nextToken();
2630 addUnwrappedLine();
2631 } else if (FormatTok->is(tok::semi)) {
2632 nextToken();
2633 addUnwrappedLine();
2634 break;
2635 } else if (FormatTok->is(tok::r_brace)) {
2636 addUnwrappedLine();
2637 break;
2638 } else {
2639 nextToken();
2640 }
2641 }
2642
2643 // Parse the class body after the enum's ";" if any.
2644 parseLevel(/*HasOpeningBrace=*/true);
2645 nextToken();
2646 --Line->Level;
2647 addUnwrappedLine();
2648 }
2649
parseRecord(bool ParseAsExpr)2650 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2651 const FormatToken &InitialToken = *FormatTok;
2652 nextToken();
2653
2654 // The actual identifier can be a nested name specifier, and in macros
2655 // it is often token-pasted.
2656 // An [[attribute]] can be before the identifier.
2657 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2658 tok::kw___attribute, tok::kw___declspec,
2659 tok::kw_alignas, tok::l_square, tok::r_square) ||
2660 ((Style.Language == FormatStyle::LK_Java ||
2661 Style.Language == FormatStyle::LK_JavaScript) &&
2662 FormatTok->isOneOf(tok::period, tok::comma))) {
2663 if (Style.Language == FormatStyle::LK_JavaScript &&
2664 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2665 // JavaScript/TypeScript supports inline object types in
2666 // extends/implements positions:
2667 // class Foo implements {bar: number} { }
2668 nextToken();
2669 if (FormatTok->is(tok::l_brace)) {
2670 tryToParseBracedList();
2671 continue;
2672 }
2673 }
2674 bool IsNonMacroIdentifier =
2675 FormatTok->is(tok::identifier) &&
2676 FormatTok->TokenText != FormatTok->TokenText.upper();
2677 nextToken();
2678 // We can have macros or attributes in between 'class' and the class name.
2679 if (!IsNonMacroIdentifier) {
2680 if (FormatTok->Tok.is(tok::l_paren)) {
2681 parseParens();
2682 } else if (FormatTok->is(TT_AttributeSquare)) {
2683 parseSquare();
2684 // Consume the closing TT_AttributeSquare.
2685 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2686 nextToken();
2687 }
2688 }
2689 }
2690
2691 // Note that parsing away template declarations here leads to incorrectly
2692 // accepting function declarations as record declarations.
2693 // In general, we cannot solve this problem. Consider:
2694 // class A<int> B() {}
2695 // which can be a function definition or a class definition when B() is a
2696 // macro. If we find enough real-world cases where this is a problem, we
2697 // can parse for the 'template' keyword in the beginning of the statement,
2698 // and thus rule out the record production in case there is no template
2699 // (this would still leave us with an ambiguity between template function
2700 // and class declarations).
2701 if (FormatTok->isOneOf(tok::colon, tok::less)) {
2702 while (!eof()) {
2703 if (FormatTok->is(tok::l_brace)) {
2704 calculateBraceTypes(/*ExpectClassBody=*/true);
2705 if (!tryToParseBracedList())
2706 break;
2707 }
2708 if (FormatTok->Tok.is(tok::semi))
2709 return;
2710 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2711 addUnwrappedLine();
2712 nextToken();
2713 parseCSharpGenericTypeConstraint();
2714 break;
2715 }
2716 nextToken();
2717 }
2718 }
2719 if (FormatTok->Tok.is(tok::l_brace)) {
2720 if (ParseAsExpr) {
2721 parseChildBlock();
2722 } else {
2723 if (ShouldBreakBeforeBrace(Style, InitialToken))
2724 addUnwrappedLine();
2725
2726 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
2727 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
2728 }
2729 }
2730 // There is no addUnwrappedLine() here so that we fall through to parsing a
2731 // structural element afterwards. Thus, in "class A {} n, m;",
2732 // "} n, m;" will end up in one unwrapped line.
2733 }
2734
parseObjCMethod()2735 void UnwrappedLineParser::parseObjCMethod() {
2736 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2737 "'(' or identifier expected.");
2738 do {
2739 if (FormatTok->Tok.is(tok::semi)) {
2740 nextToken();
2741 addUnwrappedLine();
2742 return;
2743 } else if (FormatTok->Tok.is(tok::l_brace)) {
2744 if (Style.BraceWrapping.AfterFunction)
2745 addUnwrappedLine();
2746 parseBlock(/*MustBeDeclaration=*/false);
2747 addUnwrappedLine();
2748 return;
2749 } else {
2750 nextToken();
2751 }
2752 } while (!eof());
2753 }
2754
parseObjCProtocolList()2755 void UnwrappedLineParser::parseObjCProtocolList() {
2756 assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2757 do {
2758 nextToken();
2759 // Early exit in case someone forgot a close angle.
2760 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2761 FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2762 return;
2763 } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2764 nextToken(); // Skip '>'.
2765 }
2766
parseObjCUntilAtEnd()2767 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2768 do {
2769 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2770 nextToken();
2771 addUnwrappedLine();
2772 break;
2773 }
2774 if (FormatTok->is(tok::l_brace)) {
2775 parseBlock(/*MustBeDeclaration=*/false);
2776 // In ObjC interfaces, nothing should be following the "}".
2777 addUnwrappedLine();
2778 } else if (FormatTok->is(tok::r_brace)) {
2779 // Ignore stray "}". parseStructuralElement doesn't consume them.
2780 nextToken();
2781 addUnwrappedLine();
2782 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2783 nextToken();
2784 parseObjCMethod();
2785 } else {
2786 parseStructuralElement();
2787 }
2788 } while (!eof());
2789 }
2790
parseObjCInterfaceOrImplementation()2791 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2792 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2793 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2794 nextToken();
2795 nextToken(); // interface name
2796
2797 // @interface can be followed by a lightweight generic
2798 // specialization list, then either a base class or a category.
2799 if (FormatTok->Tok.is(tok::less)) {
2800 parseObjCLightweightGenerics();
2801 }
2802 if (FormatTok->Tok.is(tok::colon)) {
2803 nextToken();
2804 nextToken(); // base class name
2805 // The base class can also have lightweight generics applied to it.
2806 if (FormatTok->Tok.is(tok::less)) {
2807 parseObjCLightweightGenerics();
2808 }
2809 } else if (FormatTok->Tok.is(tok::l_paren))
2810 // Skip category, if present.
2811 parseParens();
2812
2813 if (FormatTok->Tok.is(tok::less))
2814 parseObjCProtocolList();
2815
2816 if (FormatTok->Tok.is(tok::l_brace)) {
2817 if (Style.BraceWrapping.AfterObjCDeclaration)
2818 addUnwrappedLine();
2819 parseBlock(/*MustBeDeclaration=*/true);
2820 }
2821
2822 // With instance variables, this puts '}' on its own line. Without instance
2823 // variables, this ends the @interface line.
2824 addUnwrappedLine();
2825
2826 parseObjCUntilAtEnd();
2827 }
2828
parseObjCLightweightGenerics()2829 void UnwrappedLineParser::parseObjCLightweightGenerics() {
2830 assert(FormatTok->Tok.is(tok::less));
2831 // Unlike protocol lists, generic parameterizations support
2832 // nested angles:
2833 //
2834 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2835 // NSObject <NSCopying, NSSecureCoding>
2836 //
2837 // so we need to count how many open angles we have left.
2838 unsigned NumOpenAngles = 1;
2839 do {
2840 nextToken();
2841 // Early exit in case someone forgot a close angle.
2842 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2843 FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2844 break;
2845 if (FormatTok->Tok.is(tok::less))
2846 ++NumOpenAngles;
2847 else if (FormatTok->Tok.is(tok::greater)) {
2848 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2849 --NumOpenAngles;
2850 }
2851 } while (!eof() && NumOpenAngles != 0);
2852 nextToken(); // Skip '>'.
2853 }
2854
2855 // Returns true for the declaration/definition form of @protocol,
2856 // false for the expression form.
parseObjCProtocol()2857 bool UnwrappedLineParser::parseObjCProtocol() {
2858 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2859 nextToken();
2860
2861 if (FormatTok->is(tok::l_paren))
2862 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2863 return false;
2864
2865 // The definition/declaration form,
2866 // @protocol Foo
2867 // - (int)someMethod;
2868 // @end
2869
2870 nextToken(); // protocol name
2871
2872 if (FormatTok->Tok.is(tok::less))
2873 parseObjCProtocolList();
2874
2875 // Check for protocol declaration.
2876 if (FormatTok->Tok.is(tok::semi)) {
2877 nextToken();
2878 addUnwrappedLine();
2879 return true;
2880 }
2881
2882 addUnwrappedLine();
2883 parseObjCUntilAtEnd();
2884 return true;
2885 }
2886
parseJavaScriptEs6ImportExport()2887 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2888 bool IsImport = FormatTok->is(Keywords.kw_import);
2889 assert(IsImport || FormatTok->is(tok::kw_export));
2890 nextToken();
2891
2892 // Consume the "default" in "export default class/function".
2893 if (FormatTok->is(tok::kw_default))
2894 nextToken();
2895
2896 // Consume "async function", "function" and "default function", so that these
2897 // get parsed as free-standing JS functions, i.e. do not require a trailing
2898 // semicolon.
2899 if (FormatTok->is(Keywords.kw_async))
2900 nextToken();
2901 if (FormatTok->is(Keywords.kw_function)) {
2902 nextToken();
2903 return;
2904 }
2905
2906 // For imports, `export *`, `export {...}`, consume the rest of the line up
2907 // to the terminating `;`. For everything else, just return and continue
2908 // parsing the structural element, i.e. the declaration or expression for
2909 // `export default`.
2910 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2911 !FormatTok->isStringLiteral())
2912 return;
2913
2914 while (!eof()) {
2915 if (FormatTok->is(tok::semi))
2916 return;
2917 if (Line->Tokens.empty()) {
2918 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2919 // import statement should terminate.
2920 return;
2921 }
2922 if (FormatTok->is(tok::l_brace)) {
2923 FormatTok->setBlockKind(BK_Block);
2924 nextToken();
2925 parseBracedList();
2926 } else {
2927 nextToken();
2928 }
2929 }
2930 }
2931
parseStatementMacro()2932 void UnwrappedLineParser::parseStatementMacro() {
2933 nextToken();
2934 if (FormatTok->is(tok::l_paren))
2935 parseParens();
2936 if (FormatTok->is(tok::semi))
2937 nextToken();
2938 addUnwrappedLine();
2939 }
2940
printDebugInfo(const UnwrappedLine & Line,StringRef Prefix="")2941 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2942 StringRef Prefix = "") {
2943 llvm::dbgs() << Prefix << "Line(" << Line.Level
2944 << ", FSC=" << Line.FirstStartColumn << ")"
2945 << (Line.InPPDirective ? " MACRO" : "") << ": ";
2946 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2947 E = Line.Tokens.end();
2948 I != E; ++I) {
2949 llvm::dbgs() << I->Tok->Tok.getName() << "["
2950 << "T=" << (unsigned)I->Tok->getType()
2951 << ", OC=" << I->Tok->OriginalColumn << "] ";
2952 }
2953 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2954 E = Line.Tokens.end();
2955 I != E; ++I) {
2956 const UnwrappedLineNode &Node = *I;
2957 for (SmallVectorImpl<UnwrappedLine>::const_iterator
2958 I = Node.Children.begin(),
2959 E = Node.Children.end();
2960 I != E; ++I) {
2961 printDebugInfo(*I, "\nChild: ");
2962 }
2963 }
2964 llvm::dbgs() << "\n";
2965 }
2966
addUnwrappedLine(LineLevel AdjustLevel)2967 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
2968 if (Line->Tokens.empty())
2969 return;
2970 LLVM_DEBUG({
2971 if (CurrentLines == &Lines)
2972 printDebugInfo(*Line);
2973 });
2974
2975 // If this line closes a block when in Whitesmiths mode, remember that
2976 // information so that the level can be decreased after the line is added.
2977 // This has to happen after the addition of the line since the line itself
2978 // needs to be indented.
2979 bool ClosesWhitesmithsBlock =
2980 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
2981 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2982
2983 CurrentLines->push_back(std::move(*Line));
2984 Line->Tokens.clear();
2985 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2986 Line->FirstStartColumn = 0;
2987
2988 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
2989 --Line->Level;
2990 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2991 CurrentLines->append(
2992 std::make_move_iterator(PreprocessorDirectives.begin()),
2993 std::make_move_iterator(PreprocessorDirectives.end()));
2994 PreprocessorDirectives.clear();
2995 }
2996 // Disconnect the current token from the last token on the previous line.
2997 FormatTok->Previous = nullptr;
2998 }
2999
eof() const3000 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3001
isOnNewLine(const FormatToken & FormatTok)3002 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3003 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3004 FormatTok.NewlinesBefore > 0;
3005 }
3006
3007 // Checks if \p FormatTok is a line comment that continues the line comment
3008 // section on \p Line.
3009 static bool
continuesLineCommentSection(const FormatToken & FormatTok,const UnwrappedLine & Line,const llvm::Regex & CommentPragmasRegex)3010 continuesLineCommentSection(const FormatToken &FormatTok,
3011 const UnwrappedLine &Line,
3012 const llvm::Regex &CommentPragmasRegex) {
3013 if (Line.Tokens.empty())
3014 return false;
3015
3016 StringRef IndentContent = FormatTok.TokenText;
3017 if (FormatTok.TokenText.startswith("//") ||
3018 FormatTok.TokenText.startswith("/*"))
3019 IndentContent = FormatTok.TokenText.substr(2);
3020 if (CommentPragmasRegex.match(IndentContent))
3021 return false;
3022
3023 // If Line starts with a line comment, then FormatTok continues the comment
3024 // section if its original column is greater or equal to the original start
3025 // column of the line.
3026 //
3027 // Define the min column token of a line as follows: if a line ends in '{' or
3028 // contains a '{' followed by a line comment, then the min column token is
3029 // that '{'. Otherwise, the min column token of the line is the first token of
3030 // the line.
3031 //
3032 // If Line starts with a token other than a line comment, then FormatTok
3033 // continues the comment section if its original column is greater than the
3034 // original start column of the min column token of the line.
3035 //
3036 // For example, the second line comment continues the first in these cases:
3037 //
3038 // // first line
3039 // // second line
3040 //
3041 // and:
3042 //
3043 // // first line
3044 // // second line
3045 //
3046 // and:
3047 //
3048 // int i; // first line
3049 // // second line
3050 //
3051 // and:
3052 //
3053 // do { // first line
3054 // // second line
3055 // int i;
3056 // } while (true);
3057 //
3058 // and:
3059 //
3060 // enum {
3061 // a, // first line
3062 // // second line
3063 // b
3064 // };
3065 //
3066 // The second line comment doesn't continue the first in these cases:
3067 //
3068 // // first line
3069 // // second line
3070 //
3071 // and:
3072 //
3073 // int i; // first line
3074 // // second line
3075 //
3076 // and:
3077 //
3078 // do { // first line
3079 // // second line
3080 // int i;
3081 // } while (true);
3082 //
3083 // and:
3084 //
3085 // enum {
3086 // a, // first line
3087 // // second line
3088 // };
3089 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3090
3091 // Scan for '{//'. If found, use the column of '{' as a min column for line
3092 // comment section continuation.
3093 const FormatToken *PreviousToken = nullptr;
3094 for (const UnwrappedLineNode &Node : Line.Tokens) {
3095 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3096 isLineComment(*Node.Tok)) {
3097 MinColumnToken = PreviousToken;
3098 break;
3099 }
3100 PreviousToken = Node.Tok;
3101
3102 // Grab the last newline preceding a token in this unwrapped line.
3103 if (Node.Tok->NewlinesBefore > 0) {
3104 MinColumnToken = Node.Tok;
3105 }
3106 }
3107 if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3108 MinColumnToken = PreviousToken;
3109 }
3110
3111 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3112 MinColumnToken);
3113 }
3114
flushComments(bool NewlineBeforeNext)3115 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3116 bool JustComments = Line->Tokens.empty();
3117 for (SmallVectorImpl<FormatToken *>::const_iterator
3118 I = CommentsBeforeNextToken.begin(),
3119 E = CommentsBeforeNextToken.end();
3120 I != E; ++I) {
3121 // Line comments that belong to the same line comment section are put on the
3122 // same line since later we might want to reflow content between them.
3123 // Additional fine-grained breaking of line comment sections is controlled
3124 // by the class BreakableLineCommentSection in case it is desirable to keep
3125 // several line comment sections in the same unwrapped line.
3126 //
3127 // FIXME: Consider putting separate line comment sections as children to the
3128 // unwrapped line instead.
3129 (*I)->ContinuesLineCommentSection =
3130 continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3131 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3132 addUnwrappedLine();
3133 pushToken(*I);
3134 }
3135 if (NewlineBeforeNext && JustComments)
3136 addUnwrappedLine();
3137 CommentsBeforeNextToken.clear();
3138 }
3139
nextToken(int LevelDifference)3140 void UnwrappedLineParser::nextToken(int LevelDifference) {
3141 if (eof())
3142 return;
3143 flushComments(isOnNewLine(*FormatTok));
3144 pushToken(FormatTok);
3145 FormatToken *Previous = FormatTok;
3146 if (Style.Language != FormatStyle::LK_JavaScript)
3147 readToken(LevelDifference);
3148 else
3149 readTokenWithJavaScriptASI();
3150 FormatTok->Previous = Previous;
3151 }
3152
distributeComments(const SmallVectorImpl<FormatToken * > & Comments,const FormatToken * NextTok)3153 void UnwrappedLineParser::distributeComments(
3154 const SmallVectorImpl<FormatToken *> &Comments,
3155 const FormatToken *NextTok) {
3156 // Whether or not a line comment token continues a line is controlled by
3157 // the method continuesLineCommentSection, with the following caveat:
3158 //
3159 // Define a trail of Comments to be a nonempty proper postfix of Comments such
3160 // that each comment line from the trail is aligned with the next token, if
3161 // the next token exists. If a trail exists, the beginning of the maximal
3162 // trail is marked as a start of a new comment section.
3163 //
3164 // For example in this code:
3165 //
3166 // int a; // line about a
3167 // // line 1 about b
3168 // // line 2 about b
3169 // int b;
3170 //
3171 // the two lines about b form a maximal trail, so there are two sections, the
3172 // first one consisting of the single comment "// line about a" and the
3173 // second one consisting of the next two comments.
3174 if (Comments.empty())
3175 return;
3176 bool ShouldPushCommentsInCurrentLine = true;
3177 bool HasTrailAlignedWithNextToken = false;
3178 unsigned StartOfTrailAlignedWithNextToken = 0;
3179 if (NextTok) {
3180 // We are skipping the first element intentionally.
3181 for (unsigned i = Comments.size() - 1; i > 0; --i) {
3182 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3183 HasTrailAlignedWithNextToken = true;
3184 StartOfTrailAlignedWithNextToken = i;
3185 }
3186 }
3187 }
3188 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3189 FormatToken *FormatTok = Comments[i];
3190 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3191 FormatTok->ContinuesLineCommentSection = false;
3192 } else {
3193 FormatTok->ContinuesLineCommentSection =
3194 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3195 }
3196 if (!FormatTok->ContinuesLineCommentSection &&
3197 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3198 ShouldPushCommentsInCurrentLine = false;
3199 }
3200 if (ShouldPushCommentsInCurrentLine) {
3201 pushToken(FormatTok);
3202 } else {
3203 CommentsBeforeNextToken.push_back(FormatTok);
3204 }
3205 }
3206 }
3207
readToken(int LevelDifference)3208 void UnwrappedLineParser::readToken(int LevelDifference) {
3209 SmallVector<FormatToken *, 1> Comments;
3210 do {
3211 FormatTok = Tokens->getNextToken();
3212 assert(FormatTok);
3213 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3214 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3215 distributeComments(Comments, FormatTok);
3216 Comments.clear();
3217 // If there is an unfinished unwrapped line, we flush the preprocessor
3218 // directives only after that unwrapped line was finished later.
3219 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3220 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3221 assert((LevelDifference >= 0 ||
3222 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3223 "LevelDifference makes Line->Level negative");
3224 Line->Level += LevelDifference;
3225 // Comments stored before the preprocessor directive need to be output
3226 // before the preprocessor directive, at the same level as the
3227 // preprocessor directive, as we consider them to apply to the directive.
3228 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3229 PPBranchLevel > 0)
3230 Line->Level += PPBranchLevel;
3231 flushComments(isOnNewLine(*FormatTok));
3232 parsePPDirective();
3233 }
3234 while (FormatTok->getType() == TT_ConflictStart ||
3235 FormatTok->getType() == TT_ConflictEnd ||
3236 FormatTok->getType() == TT_ConflictAlternative) {
3237 if (FormatTok->getType() == TT_ConflictStart) {
3238 conditionalCompilationStart(/*Unreachable=*/false);
3239 } else if (FormatTok->getType() == TT_ConflictAlternative) {
3240 conditionalCompilationAlternative();
3241 } else if (FormatTok->getType() == TT_ConflictEnd) {
3242 conditionalCompilationEnd();
3243 }
3244 FormatTok = Tokens->getNextToken();
3245 FormatTok->MustBreakBefore = true;
3246 }
3247
3248 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3249 !Line->InPPDirective) {
3250 continue;
3251 }
3252
3253 if (!FormatTok->Tok.is(tok::comment)) {
3254 distributeComments(Comments, FormatTok);
3255 Comments.clear();
3256 return;
3257 }
3258
3259 Comments.push_back(FormatTok);
3260 } while (!eof());
3261
3262 distributeComments(Comments, nullptr);
3263 Comments.clear();
3264 }
3265
pushToken(FormatToken * Tok)3266 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3267 Line->Tokens.push_back(UnwrappedLineNode(Tok));
3268 if (MustBreakBeforeNextToken) {
3269 Line->Tokens.back().Tok->MustBreakBefore = true;
3270 MustBreakBeforeNextToken = false;
3271 }
3272 }
3273
3274 } // end namespace format
3275 } // end namespace clang
3276