xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/UnwrappedLineParser.cpp (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31 
32   // Returns the next token in the token stream.
33   virtual FormatToken *getNextToken() = 0;
34 
35   // Returns the token precedint the token returned by the last call to
36   // getNextToken() in the token stream, or nullptr if no such token exists.
37   virtual FormatToken *getPreviousToken() = 0;
38 
39   // Returns the token that would be returned by the next call to
40   // getNextToken().
41   virtual FormatToken *peekNextToken() = 0;
42 
43   // Returns whether we are at the end of the file.
44   // This can be different from whether getNextToken() returned an eof token
45   // when the FormatTokenSource is a view on a part of the token stream.
46   virtual bool isEOF() = 0;
47 
48   // Gets the current position in the token stream, to be used by setPosition().
49   virtual unsigned getPosition() = 0;
50 
51   // Resets the token stream to the state it was in when getPosition() returned
52   // Position, and return the token at that position in the stream.
53   virtual FormatToken *setPosition(unsigned Position) = 0;
54 };
55 
56 namespace {
57 
58 class ScopedDeclarationState {
59 public:
60   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
61                          bool MustBeDeclaration)
62       : Line(Line), Stack(Stack) {
63     Line.MustBeDeclaration = MustBeDeclaration;
64     Stack.push_back(MustBeDeclaration);
65   }
66   ~ScopedDeclarationState() {
67     Stack.pop_back();
68     if (!Stack.empty())
69       Line.MustBeDeclaration = Stack.back();
70     else
71       Line.MustBeDeclaration = true;
72   }
73 
74 private:
75   UnwrappedLine &Line;
76   std::vector<bool> &Stack;
77 };
78 
79 static bool isLineComment(const FormatToken &FormatTok) {
80   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
81 }
82 
83 // Checks if \p FormatTok is a line comment that continues the line comment
84 // \p Previous. The original column of \p MinColumnToken is used to determine
85 // whether \p FormatTok is indented enough to the right to continue \p Previous.
86 static bool continuesLineComment(const FormatToken &FormatTok,
87                                  const FormatToken *Previous,
88                                  const FormatToken *MinColumnToken) {
89   if (!Previous || !MinColumnToken)
90     return false;
91   unsigned MinContinueColumn =
92       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
93   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
94          isLineComment(*Previous) &&
95          FormatTok.OriginalColumn >= MinContinueColumn;
96 }
97 
98 class ScopedMacroState : public FormatTokenSource {
99 public:
100   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
101                    FormatToken *&ResetToken)
102       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
103         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
104         Token(nullptr), PreviousToken(nullptr) {
105     FakeEOF.Tok.startToken();
106     FakeEOF.Tok.setKind(tok::eof);
107     TokenSource = this;
108     Line.Level = 0;
109     Line.InPPDirective = true;
110   }
111 
112   ~ScopedMacroState() override {
113     TokenSource = PreviousTokenSource;
114     ResetToken = Token;
115     Line.InPPDirective = false;
116     Line.Level = PreviousLineLevel;
117   }
118 
119   FormatToken *getNextToken() override {
120     // The \c UnwrappedLineParser guards against this by never calling
121     // \c getNextToken() after it has encountered the first eof token.
122     assert(!eof());
123     PreviousToken = Token;
124     Token = PreviousTokenSource->getNextToken();
125     if (eof())
126       return &FakeEOF;
127     return Token;
128   }
129 
130   FormatToken *getPreviousToken() override {
131     return PreviousTokenSource->getPreviousToken();
132   }
133 
134   FormatToken *peekNextToken() override {
135     if (eof())
136       return &FakeEOF;
137     return PreviousTokenSource->peekNextToken();
138   }
139 
140   bool isEOF() override { return PreviousTokenSource->isEOF(); }
141 
142   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
143 
144   FormatToken *setPosition(unsigned Position) override {
145     PreviousToken = nullptr;
146     Token = PreviousTokenSource->setPosition(Position);
147     return Token;
148   }
149 
150 private:
151   bool eof() {
152     return Token && Token->HasUnescapedNewline &&
153            !continuesLineComment(*Token, PreviousToken,
154                                  /*MinColumnToken=*/PreviousToken);
155   }
156 
157   FormatToken FakeEOF;
158   UnwrappedLine &Line;
159   FormatTokenSource *&TokenSource;
160   FormatToken *&ResetToken;
161   unsigned PreviousLineLevel;
162   FormatTokenSource *PreviousTokenSource;
163 
164   FormatToken *Token;
165   FormatToken *PreviousToken;
166 };
167 
168 } // end anonymous namespace
169 
170 class ScopedLineState {
171 public:
172   ScopedLineState(UnwrappedLineParser &Parser,
173                   bool SwitchToPreprocessorLines = false)
174       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
175     if (SwitchToPreprocessorLines)
176       Parser.CurrentLines = &Parser.PreprocessorDirectives;
177     else if (!Parser.Line->Tokens.empty())
178       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
179     PreBlockLine = std::move(Parser.Line);
180     Parser.Line = std::make_unique<UnwrappedLine>();
181     Parser.Line->Level = PreBlockLine->Level;
182     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
183   }
184 
185   ~ScopedLineState() {
186     if (!Parser.Line->Tokens.empty()) {
187       Parser.addUnwrappedLine();
188     }
189     assert(Parser.Line->Tokens.empty());
190     Parser.Line = std::move(PreBlockLine);
191     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
192       Parser.MustBreakBeforeNextToken = true;
193     Parser.CurrentLines = OriginalLines;
194   }
195 
196 private:
197   UnwrappedLineParser &Parser;
198 
199   std::unique_ptr<UnwrappedLine> PreBlockLine;
200   SmallVectorImpl<UnwrappedLine> *OriginalLines;
201 };
202 
203 class CompoundStatementIndenter {
204 public:
205   CompoundStatementIndenter(UnwrappedLineParser *Parser,
206                             const FormatStyle &Style, unsigned &LineLevel)
207       : CompoundStatementIndenter(Parser, LineLevel,
208                                   Style.BraceWrapping.AfterControlStatement,
209                                   Style.BraceWrapping.IndentBraces) {}
210   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
211                             bool WrapBrace, bool IndentBrace)
212       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
213     if (WrapBrace)
214       Parser->addUnwrappedLine();
215     if (IndentBrace)
216       ++LineLevel;
217   }
218   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
219 
220 private:
221   unsigned &LineLevel;
222   unsigned OldLineLevel;
223 };
224 
225 namespace {
226 
227 class IndexedTokenSource : public FormatTokenSource {
228 public:
229   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
230       : Tokens(Tokens), Position(-1) {}
231 
232   FormatToken *getNextToken() override {
233     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
234       LLVM_DEBUG({
235         llvm::dbgs() << "Next ";
236         dbgToken(Position);
237       });
238       return Tokens[Position];
239     }
240     ++Position;
241     LLVM_DEBUG({
242       llvm::dbgs() << "Next ";
243       dbgToken(Position);
244     });
245     return Tokens[Position];
246   }
247 
248   FormatToken *getPreviousToken() override {
249     assert(Position > 0);
250     return Tokens[Position - 1];
251   }
252 
253   FormatToken *peekNextToken() override {
254     int Next = Position + 1;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Peeking ";
257       dbgToken(Next);
258     });
259     return Tokens[Next];
260   }
261 
262   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
263 
264   unsigned getPosition() override {
265     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
266     assert(Position >= 0);
267     return Position;
268   }
269 
270   FormatToken *setPosition(unsigned P) override {
271     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
272     Position = P;
273     return Tokens[Position];
274   }
275 
276   void reset() { Position = -1; }
277 
278 private:
279   void dbgToken(int Position, llvm::StringRef Indent = "") {
280     FormatToken *Tok = Tokens[Position];
281     llvm::dbgs() << Indent << "[" << Position
282                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
283                  << ", Macro: " << !!Tok->MacroCtx << "\n";
284   }
285 
286   ArrayRef<FormatToken *> Tokens;
287   int Position;
288 };
289 
290 } // end anonymous namespace
291 
292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
293                                          const AdditionalKeywords &Keywords,
294                                          unsigned FirstStartColumn,
295                                          ArrayRef<FormatToken *> Tokens,
296                                          UnwrappedLineConsumer &Callback)
297     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
298       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
299       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
300       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
301       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
302                        ? IG_Rejected
303                        : IG_Inited),
304       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
305 
306 void UnwrappedLineParser::reset() {
307   PPBranchLevel = -1;
308   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
309                      ? IG_Rejected
310                      : IG_Inited;
311   IncludeGuardToken = nullptr;
312   Line.reset(new UnwrappedLine);
313   CommentsBeforeNextToken.clear();
314   FormatTok = nullptr;
315   MustBreakBeforeNextToken = false;
316   PreprocessorDirectives.clear();
317   CurrentLines = &Lines;
318   DeclarationScopeStack.clear();
319   PPStack.clear();
320   Line->FirstStartColumn = FirstStartColumn;
321 }
322 
323 void UnwrappedLineParser::parse() {
324   IndexedTokenSource TokenSource(AllTokens);
325   Line->FirstStartColumn = FirstStartColumn;
326   do {
327     LLVM_DEBUG(llvm::dbgs() << "----\n");
328     reset();
329     Tokens = &TokenSource;
330     TokenSource.reset();
331 
332     readToken();
333     parseFile();
334 
335     // If we found an include guard then all preprocessor directives (other than
336     // the guard) are over-indented by one.
337     if (IncludeGuard == IG_Found)
338       for (auto &Line : Lines)
339         if (Line.InPPDirective && Line.Level > 0)
340           --Line.Level;
341 
342     // Create line with eof token.
343     pushToken(FormatTok);
344     addUnwrappedLine();
345 
346     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
347                                                   E = Lines.end();
348          I != E; ++I) {
349       Callback.consumeUnwrappedLine(*I);
350     }
351     Callback.finishRun();
352     Lines.clear();
353     while (!PPLevelBranchIndex.empty() &&
354            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
355       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
356       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
357     }
358     if (!PPLevelBranchIndex.empty()) {
359       ++PPLevelBranchIndex.back();
360       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
361       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
362     }
363   } while (!PPLevelBranchIndex.empty());
364 }
365 
366 void UnwrappedLineParser::parseFile() {
367   // The top-level context in a file always has declarations, except for pre-
368   // processor directives and JavaScript files.
369   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
370   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
371                                           MustBeDeclaration);
372   if (Style.Language == FormatStyle::LK_TextProto)
373     parseBracedList();
374   else
375     parseLevel(/*HasOpeningBrace=*/false);
376   // Make sure to format the remaining tokens.
377   //
378   // LK_TextProto is special since its top-level is parsed as the body of a
379   // braced list, which does not necessarily have natural line separators such
380   // as a semicolon. Comments after the last entry that have been determined to
381   // not belong to that line, as in:
382   //   key: value
383   //   // endfile comment
384   // do not have a chance to be put on a line of their own until this point.
385   // Here we add this newline before end-of-file comments.
386   if (Style.Language == FormatStyle::LK_TextProto &&
387       !CommentsBeforeNextToken.empty())
388     addUnwrappedLine();
389   flushComments(true);
390   addUnwrappedLine();
391 }
392 
393 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
394   do {
395     switch (FormatTok->Tok.getKind()) {
396     case tok::l_brace:
397       return;
398     default:
399       if (FormatTok->is(Keywords.kw_where)) {
400         addUnwrappedLine();
401         nextToken();
402         parseCSharpGenericTypeConstraint();
403         break;
404       }
405       nextToken();
406       break;
407     }
408   } while (!eof());
409 }
410 
411 void UnwrappedLineParser::parseCSharpAttribute() {
412   int UnpairedSquareBrackets = 1;
413   do {
414     switch (FormatTok->Tok.getKind()) {
415     case tok::r_square:
416       nextToken();
417       --UnpairedSquareBrackets;
418       if (UnpairedSquareBrackets == 0) {
419         addUnwrappedLine();
420         return;
421       }
422       break;
423     case tok::l_square:
424       ++UnpairedSquareBrackets;
425       nextToken();
426       break;
427     default:
428       nextToken();
429       break;
430     }
431   } while (!eof());
432 }
433 
434 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
435   bool SwitchLabelEncountered = false;
436   do {
437     tok::TokenKind kind = FormatTok->Tok.getKind();
438     if (FormatTok->getType() == TT_MacroBlockBegin) {
439       kind = tok::l_brace;
440     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
441       kind = tok::r_brace;
442     }
443 
444     switch (kind) {
445     case tok::comment:
446       nextToken();
447       addUnwrappedLine();
448       break;
449     case tok::l_brace:
450       // FIXME: Add parameter whether this can happen - if this happens, we must
451       // be in a non-declaration context.
452       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
453         continue;
454       parseBlock();
455       addUnwrappedLine();
456       break;
457     case tok::r_brace:
458       if (HasOpeningBrace)
459         return;
460       nextToken();
461       addUnwrappedLine();
462       break;
463     case tok::kw_default: {
464       unsigned StoredPosition = Tokens->getPosition();
465       FormatToken *Next;
466       do {
467         Next = Tokens->getNextToken();
468       } while (Next->is(tok::comment));
469       FormatTok = Tokens->setPosition(StoredPosition);
470       if (Next && Next->isNot(tok::colon)) {
471         // default not followed by ':' is not a case label; treat it like
472         // an identifier.
473         parseStructuralElement();
474         break;
475       }
476       // Else, if it is 'default:', fall through to the case handling.
477       LLVM_FALLTHROUGH;
478     }
479     case tok::kw_case:
480       if (Style.isJavaScript() && Line->MustBeDeclaration) {
481         // A 'case: string' style field declaration.
482         parseStructuralElement();
483         break;
484       }
485       if (!SwitchLabelEncountered &&
486           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
487         ++Line->Level;
488       SwitchLabelEncountered = true;
489       parseStructuralElement();
490       break;
491     case tok::l_square:
492       if (Style.isCSharp()) {
493         nextToken();
494         parseCSharpAttribute();
495         break;
496       }
497       LLVM_FALLTHROUGH;
498     default:
499       parseStructuralElement(!HasOpeningBrace);
500       break;
501     }
502   } while (!eof());
503 }
504 
505 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
506   // We'll parse forward through the tokens until we hit
507   // a closing brace or eof - note that getNextToken() will
508   // parse macros, so this will magically work inside macro
509   // definitions, too.
510   unsigned StoredPosition = Tokens->getPosition();
511   FormatToken *Tok = FormatTok;
512   const FormatToken *PrevTok = Tok->Previous;
513   // Keep a stack of positions of lbrace tokens. We will
514   // update information about whether an lbrace starts a
515   // braced init list or a different block during the loop.
516   SmallVector<FormatToken *, 8> LBraceStack;
517   assert(Tok->Tok.is(tok::l_brace));
518   do {
519     // Get next non-comment token.
520     FormatToken *NextTok;
521     unsigned ReadTokens = 0;
522     do {
523       NextTok = Tokens->getNextToken();
524       ++ReadTokens;
525     } while (NextTok->is(tok::comment));
526 
527     switch (Tok->Tok.getKind()) {
528     case tok::l_brace:
529       if (Style.isJavaScript() && PrevTok) {
530         if (PrevTok->isOneOf(tok::colon, tok::less))
531           // A ':' indicates this code is in a type, or a braced list
532           // following a label in an object literal ({a: {b: 1}}).
533           // A '<' could be an object used in a comparison, but that is nonsense
534           // code (can never return true), so more likely it is a generic type
535           // argument (`X<{a: string; b: number}>`).
536           // The code below could be confused by semicolons between the
537           // individual members in a type member list, which would normally
538           // trigger BK_Block. In both cases, this must be parsed as an inline
539           // braced init.
540           Tok->setBlockKind(BK_BracedInit);
541         else if (PrevTok->is(tok::r_paren))
542           // `) { }` can only occur in function or method declarations in JS.
543           Tok->setBlockKind(BK_Block);
544       } else {
545         Tok->setBlockKind(BK_Unknown);
546       }
547       LBraceStack.push_back(Tok);
548       break;
549     case tok::r_brace:
550       if (LBraceStack.empty())
551         break;
552       if (LBraceStack.back()->is(BK_Unknown)) {
553         bool ProbablyBracedList = false;
554         if (Style.Language == FormatStyle::LK_Proto) {
555           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
556         } else {
557           // Skip NextTok over preprocessor lines, otherwise we may not
558           // properly diagnose the block as a braced intializer
559           // if the comma separator appears after the pp directive.
560           while (NextTok->is(tok::hash)) {
561             ScopedMacroState MacroState(*Line, Tokens, NextTok);
562             do {
563               NextTok = Tokens->getNextToken();
564               ++ReadTokens;
565             } while (NextTok->isNot(tok::eof));
566           }
567 
568           // Using OriginalColumn to distinguish between ObjC methods and
569           // binary operators is a bit hacky.
570           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
571                                   NextTok->OriginalColumn == 0;
572 
573           // If there is a comma, semicolon or right paren after the closing
574           // brace, we assume this is a braced initializer list.  Note that
575           // regardless how we mark inner braces here, we will overwrite the
576           // BlockKind later if we parse a braced list (where all blocks
577           // inside are by default braced lists), or when we explicitly detect
578           // blocks (for example while parsing lambdas).
579           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
580           // braced list in JS.
581           ProbablyBracedList =
582               (Style.isJavaScript() &&
583                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
584                                 Keywords.kw_as)) ||
585               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
586               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
587                                tok::r_paren, tok::r_square, tok::l_brace,
588                                tok::ellipsis) ||
589               (NextTok->is(tok::identifier) &&
590                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
591               (NextTok->is(tok::semi) &&
592                (!ExpectClassBody || LBraceStack.size() != 1)) ||
593               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
594           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
595             // We can have an array subscript after a braced init
596             // list, but C++11 attributes are expected after blocks.
597             NextTok = Tokens->getNextToken();
598             ++ReadTokens;
599             ProbablyBracedList = NextTok->isNot(tok::l_square);
600           }
601         }
602         if (ProbablyBracedList) {
603           Tok->setBlockKind(BK_BracedInit);
604           LBraceStack.back()->setBlockKind(BK_BracedInit);
605         } else {
606           Tok->setBlockKind(BK_Block);
607           LBraceStack.back()->setBlockKind(BK_Block);
608         }
609       }
610       LBraceStack.pop_back();
611       break;
612     case tok::identifier:
613       if (!Tok->is(TT_StatementMacro))
614         break;
615       LLVM_FALLTHROUGH;
616     case tok::at:
617     case tok::semi:
618     case tok::kw_if:
619     case tok::kw_while:
620     case tok::kw_for:
621     case tok::kw_switch:
622     case tok::kw_try:
623     case tok::kw___try:
624       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
625         LBraceStack.back()->setBlockKind(BK_Block);
626       break;
627     default:
628       break;
629     }
630     PrevTok = Tok;
631     Tok = NextTok;
632   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
633 
634   // Assume other blocks for all unclosed opening braces.
635   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
636     if (LBraceStack[i]->is(BK_Unknown))
637       LBraceStack[i]->setBlockKind(BK_Block);
638   }
639 
640   FormatTok = Tokens->setPosition(StoredPosition);
641 }
642 
643 template <class T>
644 static inline void hash_combine(std::size_t &seed, const T &v) {
645   std::hash<T> hasher;
646   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
647 }
648 
649 size_t UnwrappedLineParser::computePPHash() const {
650   size_t h = 0;
651   for (const auto &i : PPStack) {
652     hash_combine(h, size_t(i.Kind));
653     hash_combine(h, i.Line);
654   }
655   return h;
656 }
657 
658 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
659                                      bool MunchSemi,
660                                      bool UnindentWhitesmithsBraces) {
661   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
662          "'{' or macro block token expected");
663   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
664   FormatTok->setBlockKind(BK_Block);
665 
666   // For Whitesmiths mode, jump to the next level prior to skipping over the
667   // braces.
668   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
669     ++Line->Level;
670 
671   size_t PPStartHash = computePPHash();
672 
673   unsigned InitialLevel = Line->Level;
674   nextToken(/*LevelDifference=*/AddLevels);
675 
676   if (MacroBlock && FormatTok->is(tok::l_paren))
677     parseParens();
678 
679   size_t NbPreprocessorDirectives =
680       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
681   addUnwrappedLine();
682   size_t OpeningLineIndex =
683       CurrentLines->empty()
684           ? (UnwrappedLine::kInvalidIndex)
685           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
686 
687   // Whitesmiths is weird here. The brace needs to be indented for the namespace
688   // block, but the block itself may not be indented depending on the style
689   // settings. This allows the format to back up one level in those cases.
690   if (UnindentWhitesmithsBraces)
691     --Line->Level;
692 
693   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
694                                           MustBeDeclaration);
695   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
696     Line->Level += AddLevels;
697   parseLevel(/*HasOpeningBrace=*/true);
698 
699   if (eof())
700     return;
701 
702   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
703                  : !FormatTok->is(tok::r_brace)) {
704     Line->Level = InitialLevel;
705     FormatTok->setBlockKind(BK_Block);
706     return;
707   }
708 
709   size_t PPEndHash = computePPHash();
710 
711   // Munch the closing brace.
712   nextToken(/*LevelDifference=*/-AddLevels);
713 
714   if (MacroBlock && FormatTok->is(tok::l_paren))
715     parseParens();
716 
717   if (FormatTok->is(tok::arrow)) {
718     // Following the } we can find a trailing return type arrow
719     // as part of an implicit conversion constraint.
720     nextToken();
721     parseStructuralElement();
722   }
723 
724   if (MunchSemi && FormatTok->Tok.is(tok::semi))
725     nextToken();
726 
727   Line->Level = InitialLevel;
728 
729   if (PPStartHash == PPEndHash) {
730     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
731     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
732       // Update the opening line to add the forward reference as well
733       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
734           CurrentLines->size() - 1;
735     }
736   }
737 }
738 
739 static bool isGoogScope(const UnwrappedLine &Line) {
740   // FIXME: Closure-library specific stuff should not be hard-coded but be
741   // configurable.
742   if (Line.Tokens.size() < 4)
743     return false;
744   auto I = Line.Tokens.begin();
745   if (I->Tok->TokenText != "goog")
746     return false;
747   ++I;
748   if (I->Tok->isNot(tok::period))
749     return false;
750   ++I;
751   if (I->Tok->TokenText != "scope")
752     return false;
753   ++I;
754   return I->Tok->is(tok::l_paren);
755 }
756 
757 static bool isIIFE(const UnwrappedLine &Line,
758                    const AdditionalKeywords &Keywords) {
759   // Look for the start of an immediately invoked anonymous function.
760   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
761   // This is commonly done in JavaScript to create a new, anonymous scope.
762   // Example: (function() { ... })()
763   if (Line.Tokens.size() < 3)
764     return false;
765   auto I = Line.Tokens.begin();
766   if (I->Tok->isNot(tok::l_paren))
767     return false;
768   ++I;
769   if (I->Tok->isNot(Keywords.kw_function))
770     return false;
771   ++I;
772   return I->Tok->is(tok::l_paren);
773 }
774 
775 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
776                                    const FormatToken &InitialToken) {
777   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
778     return Style.BraceWrapping.AfterNamespace;
779   if (InitialToken.is(tok::kw_class))
780     return Style.BraceWrapping.AfterClass;
781   if (InitialToken.is(tok::kw_union))
782     return Style.BraceWrapping.AfterUnion;
783   if (InitialToken.is(tok::kw_struct))
784     return Style.BraceWrapping.AfterStruct;
785   return false;
786 }
787 
788 void UnwrappedLineParser::parseChildBlock() {
789   FormatTok->setBlockKind(BK_Block);
790   nextToken();
791   {
792     bool SkipIndent = (Style.isJavaScript() &&
793                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
794     ScopedLineState LineState(*this);
795     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
796                                             /*MustBeDeclaration=*/false);
797     Line->Level += SkipIndent ? 0 : 1;
798     parseLevel(/*HasOpeningBrace=*/true);
799     flushComments(isOnNewLine(*FormatTok));
800     Line->Level -= SkipIndent ? 0 : 1;
801   }
802   nextToken();
803 }
804 
805 void UnwrappedLineParser::parsePPDirective() {
806   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
807   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
808 
809   nextToken();
810 
811   if (!FormatTok->Tok.getIdentifierInfo()) {
812     parsePPUnknown();
813     return;
814   }
815 
816   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
817   case tok::pp_define:
818     parsePPDefine();
819     return;
820   case tok::pp_if:
821     parsePPIf(/*IfDef=*/false);
822     break;
823   case tok::pp_ifdef:
824   case tok::pp_ifndef:
825     parsePPIf(/*IfDef=*/true);
826     break;
827   case tok::pp_else:
828     parsePPElse();
829     break;
830   case tok::pp_elifdef:
831   case tok::pp_elifndef:
832   case tok::pp_elif:
833     parsePPElIf();
834     break;
835   case tok::pp_endif:
836     parsePPEndIf();
837     break;
838   default:
839     parsePPUnknown();
840     break;
841   }
842 }
843 
844 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
845   size_t Line = CurrentLines->size();
846   if (CurrentLines == &PreprocessorDirectives)
847     Line += Lines.size();
848 
849   if (Unreachable ||
850       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
851     PPStack.push_back({PP_Unreachable, Line});
852   else
853     PPStack.push_back({PP_Conditional, Line});
854 }
855 
856 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
857   ++PPBranchLevel;
858   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
859   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
860     PPLevelBranchIndex.push_back(0);
861     PPLevelBranchCount.push_back(0);
862   }
863   PPChainBranchIndex.push(0);
864   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
865   conditionalCompilationCondition(Unreachable || Skip);
866 }
867 
868 void UnwrappedLineParser::conditionalCompilationAlternative() {
869   if (!PPStack.empty())
870     PPStack.pop_back();
871   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
872   if (!PPChainBranchIndex.empty())
873     ++PPChainBranchIndex.top();
874   conditionalCompilationCondition(
875       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
876       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
877 }
878 
879 void UnwrappedLineParser::conditionalCompilationEnd() {
880   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
881   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
882     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
883       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
884     }
885   }
886   // Guard against #endif's without #if.
887   if (PPBranchLevel > -1)
888     --PPBranchLevel;
889   if (!PPChainBranchIndex.empty())
890     PPChainBranchIndex.pop();
891   if (!PPStack.empty())
892     PPStack.pop_back();
893 }
894 
895 void UnwrappedLineParser::parsePPIf(bool IfDef) {
896   bool IfNDef = FormatTok->is(tok::pp_ifndef);
897   nextToken();
898   bool Unreachable = false;
899   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
900     Unreachable = true;
901   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
902     Unreachable = true;
903   conditionalCompilationStart(Unreachable);
904   FormatToken *IfCondition = FormatTok;
905   // If there's a #ifndef on the first line, and the only lines before it are
906   // comments, it could be an include guard.
907   bool MaybeIncludeGuard = IfNDef;
908   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
909     for (auto &Line : Lines) {
910       if (!Line.Tokens.front().Tok->is(tok::comment)) {
911         MaybeIncludeGuard = false;
912         IncludeGuard = IG_Rejected;
913         break;
914       }
915     }
916   --PPBranchLevel;
917   parsePPUnknown();
918   ++PPBranchLevel;
919   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
920     IncludeGuard = IG_IfNdefed;
921     IncludeGuardToken = IfCondition;
922   }
923 }
924 
925 void UnwrappedLineParser::parsePPElse() {
926   // If a potential include guard has an #else, it's not an include guard.
927   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
928     IncludeGuard = IG_Rejected;
929   conditionalCompilationAlternative();
930   if (PPBranchLevel > -1)
931     --PPBranchLevel;
932   parsePPUnknown();
933   ++PPBranchLevel;
934 }
935 
936 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
937 
938 void UnwrappedLineParser::parsePPEndIf() {
939   conditionalCompilationEnd();
940   parsePPUnknown();
941   // If the #endif of a potential include guard is the last thing in the file,
942   // then we found an include guard.
943   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
944       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
945     IncludeGuard = IG_Found;
946 }
947 
948 void UnwrappedLineParser::parsePPDefine() {
949   nextToken();
950 
951   if (!FormatTok->Tok.getIdentifierInfo()) {
952     IncludeGuard = IG_Rejected;
953     IncludeGuardToken = nullptr;
954     parsePPUnknown();
955     return;
956   }
957 
958   if (IncludeGuard == IG_IfNdefed &&
959       IncludeGuardToken->TokenText == FormatTok->TokenText) {
960     IncludeGuard = IG_Defined;
961     IncludeGuardToken = nullptr;
962     for (auto &Line : Lines) {
963       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
964         IncludeGuard = IG_Rejected;
965         break;
966       }
967     }
968   }
969 
970   nextToken();
971   if (FormatTok->Tok.getKind() == tok::l_paren &&
972       FormatTok->WhitespaceRange.getBegin() ==
973           FormatTok->WhitespaceRange.getEnd()) {
974     parseParens();
975   }
976   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
977     Line->Level += PPBranchLevel + 1;
978   addUnwrappedLine();
979   ++Line->Level;
980 
981   // Errors during a preprocessor directive can only affect the layout of the
982   // preprocessor directive, and thus we ignore them. An alternative approach
983   // would be to use the same approach we use on the file level (no
984   // re-indentation if there was a structural error) within the macro
985   // definition.
986   parseFile();
987 }
988 
989 void UnwrappedLineParser::parsePPUnknown() {
990   do {
991     nextToken();
992   } while (!eof());
993   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
994     Line->Level += PPBranchLevel + 1;
995   addUnwrappedLine();
996 }
997 
998 // Here we exclude certain tokens that are not usually the first token in an
999 // unwrapped line. This is used in attempt to distinguish macro calls without
1000 // trailing semicolons from other constructs split to several lines.
1001 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1002   // Semicolon can be a null-statement, l_square can be a start of a macro or
1003   // a C++11 attribute, but this doesn't seem to be common.
1004   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1005          Tok.isNot(TT_AttributeSquare) &&
1006          // Tokens that can only be used as binary operators and a part of
1007          // overloaded operator names.
1008          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1009          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1010          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1011          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1012          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1013          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1014          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1015          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1016          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1017          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1018          Tok.isNot(tok::lesslessequal) &&
1019          // Colon is used in labels, base class lists, initializer lists,
1020          // range-based for loops, ternary operator, but should never be the
1021          // first token in an unwrapped line.
1022          Tok.isNot(tok::colon) &&
1023          // 'noexcept' is a trailing annotation.
1024          Tok.isNot(tok::kw_noexcept);
1025 }
1026 
1027 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1028                           const FormatToken *FormatTok) {
1029   // FIXME: This returns true for C/C++ keywords like 'struct'.
1030   return FormatTok->is(tok::identifier) &&
1031          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1032           !FormatTok->isOneOf(
1033               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1034               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1035               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1036               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1037               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1038               Keywords.kw_instanceof, Keywords.kw_interface,
1039               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1040 }
1041 
1042 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1043                                  const FormatToken *FormatTok) {
1044   return FormatTok->Tok.isLiteral() ||
1045          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1046          mustBeJSIdent(Keywords, FormatTok);
1047 }
1048 
1049 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1050 // when encountered after a value (see mustBeJSIdentOrValue).
1051 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1052                            const FormatToken *FormatTok) {
1053   return FormatTok->isOneOf(
1054       tok::kw_return, Keywords.kw_yield,
1055       // conditionals
1056       tok::kw_if, tok::kw_else,
1057       // loops
1058       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1059       // switch/case
1060       tok::kw_switch, tok::kw_case,
1061       // exceptions
1062       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1063       // declaration
1064       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1065       Keywords.kw_async, Keywords.kw_function,
1066       // import/export
1067       Keywords.kw_import, tok::kw_export);
1068 }
1069 
1070 // Checks whether a token is a type in K&R C (aka C78).
1071 static bool isC78Type(const FormatToken &Tok) {
1072   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1073                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1074                      tok::identifier);
1075 }
1076 
1077 // This function checks whether a token starts the first parameter declaration
1078 // in a K&R C (aka C78) function definition, e.g.:
1079 //   int f(a, b)
1080 //   short a, b;
1081 //   {
1082 //      return a + b;
1083 //   }
1084 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1085                                const FormatToken *FuncName) {
1086   assert(Tok);
1087   assert(Next);
1088   assert(FuncName);
1089 
1090   if (FuncName->isNot(tok::identifier))
1091     return false;
1092 
1093   const FormatToken *Prev = FuncName->Previous;
1094   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1095     return false;
1096 
1097   if (!isC78Type(*Tok) &&
1098       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1099     return false;
1100 
1101   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1102     return false;
1103 
1104   Tok = Tok->Previous;
1105   if (!Tok || Tok->isNot(tok::r_paren))
1106     return false;
1107 
1108   Tok = Tok->Previous;
1109   if (!Tok || Tok->isNot(tok::identifier))
1110     return false;
1111 
1112   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1113 }
1114 
1115 void UnwrappedLineParser::parseModuleImport() {
1116   nextToken();
1117   while (!eof()) {
1118     if (FormatTok->is(tok::colon)) {
1119       FormatTok->setType(TT_ModulePartitionColon);
1120     }
1121     // Handle import <foo/bar.h> as we would an include statement.
1122     else if (FormatTok->is(tok::less)) {
1123       nextToken();
1124       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1125         // Mark tokens up to the trailing line comments as implicit string
1126         // literals.
1127         if (FormatTok->isNot(tok::comment) &&
1128             !FormatTok->TokenText.startswith("//"))
1129           FormatTok->setType(TT_ImplicitStringLiteral);
1130         nextToken();
1131       }
1132     }
1133     if (FormatTok->is(tok::semi)) {
1134       nextToken();
1135       break;
1136     }
1137     nextToken();
1138   }
1139 
1140   addUnwrappedLine();
1141 }
1142 
1143 // readTokenWithJavaScriptASI reads the next token and terminates the current
1144 // line if JavaScript Automatic Semicolon Insertion must
1145 // happen between the current token and the next token.
1146 //
1147 // This method is conservative - it cannot cover all edge cases of JavaScript,
1148 // but only aims to correctly handle certain well known cases. It *must not*
1149 // return true in speculative cases.
1150 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1151   FormatToken *Previous = FormatTok;
1152   readToken();
1153   FormatToken *Next = FormatTok;
1154 
1155   bool IsOnSameLine =
1156       CommentsBeforeNextToken.empty()
1157           ? Next->NewlinesBefore == 0
1158           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1159   if (IsOnSameLine)
1160     return;
1161 
1162   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1163   bool PreviousStartsTemplateExpr =
1164       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1165   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1166     // If the line contains an '@' sign, the previous token might be an
1167     // annotation, which can precede another identifier/value.
1168     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1169       return LineNode.Tok->is(tok::at);
1170     });
1171     if (HasAt)
1172       return;
1173   }
1174   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1175     return addUnwrappedLine();
1176   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1177   bool NextEndsTemplateExpr =
1178       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1179   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1180       (PreviousMustBeValue ||
1181        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1182                          tok::minusminus)))
1183     return addUnwrappedLine();
1184   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1185       isJSDeclOrStmt(Keywords, Next))
1186     return addUnwrappedLine();
1187 }
1188 
1189 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
1190   if (Style.Language == FormatStyle::LK_TableGen &&
1191       FormatTok->is(tok::pp_include)) {
1192     nextToken();
1193     if (FormatTok->is(tok::string_literal))
1194       nextToken();
1195     addUnwrappedLine();
1196     return;
1197   }
1198   switch (FormatTok->Tok.getKind()) {
1199   case tok::kw_asm:
1200     nextToken();
1201     if (FormatTok->is(tok::l_brace)) {
1202       FormatTok->setType(TT_InlineASMBrace);
1203       nextToken();
1204       while (FormatTok && FormatTok->isNot(tok::eof)) {
1205         if (FormatTok->is(tok::r_brace)) {
1206           FormatTok->setType(TT_InlineASMBrace);
1207           nextToken();
1208           addUnwrappedLine();
1209           break;
1210         }
1211         FormatTok->Finalized = true;
1212         nextToken();
1213       }
1214     }
1215     break;
1216   case tok::kw_namespace:
1217     parseNamespace();
1218     return;
1219   case tok::kw_public:
1220   case tok::kw_protected:
1221   case tok::kw_private:
1222     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1223         Style.isCSharp())
1224       nextToken();
1225     else
1226       parseAccessSpecifier();
1227     return;
1228   case tok::kw_if:
1229     if (Style.isJavaScript() && Line->MustBeDeclaration)
1230       // field/method declaration.
1231       break;
1232     parseIfThenElse();
1233     return;
1234   case tok::kw_for:
1235   case tok::kw_while:
1236     if (Style.isJavaScript() && Line->MustBeDeclaration)
1237       // field/method declaration.
1238       break;
1239     parseForOrWhileLoop();
1240     return;
1241   case tok::kw_do:
1242     if (Style.isJavaScript() && Line->MustBeDeclaration)
1243       // field/method declaration.
1244       break;
1245     parseDoWhile();
1246     return;
1247   case tok::kw_switch:
1248     if (Style.isJavaScript() && Line->MustBeDeclaration)
1249       // 'switch: string' field declaration.
1250       break;
1251     parseSwitch();
1252     return;
1253   case tok::kw_default:
1254     if (Style.isJavaScript() && Line->MustBeDeclaration)
1255       // 'default: string' field declaration.
1256       break;
1257     nextToken();
1258     if (FormatTok->is(tok::colon)) {
1259       parseLabel();
1260       return;
1261     }
1262     // e.g. "default void f() {}" in a Java interface.
1263     break;
1264   case tok::kw_case:
1265     if (Style.isJavaScript() && Line->MustBeDeclaration)
1266       // 'case: string' field declaration.
1267       break;
1268     parseCaseLabel();
1269     return;
1270   case tok::kw_try:
1271   case tok::kw___try:
1272     if (Style.isJavaScript() && Line->MustBeDeclaration)
1273       // field/method declaration.
1274       break;
1275     parseTryCatch();
1276     return;
1277   case tok::kw_extern:
1278     nextToken();
1279     if (FormatTok->Tok.is(tok::string_literal)) {
1280       nextToken();
1281       if (FormatTok->Tok.is(tok::l_brace)) {
1282         if (Style.BraceWrapping.AfterExternBlock)
1283           addUnwrappedLine();
1284         // Either we indent or for backwards compatibility we follow the
1285         // AfterExternBlock style.
1286         unsigned AddLevels =
1287             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1288                     (Style.BraceWrapping.AfterExternBlock &&
1289                      Style.IndentExternBlock ==
1290                          FormatStyle::IEBS_AfterExternBlock)
1291                 ? 1u
1292                 : 0u;
1293         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1294         addUnwrappedLine();
1295         return;
1296       }
1297     }
1298     break;
1299   case tok::kw_export:
1300     if (Style.isJavaScript()) {
1301       parseJavaScriptEs6ImportExport();
1302       return;
1303     }
1304     if (!Style.isCpp())
1305       break;
1306     // Handle C++ "(inline|export) namespace".
1307     LLVM_FALLTHROUGH;
1308   case tok::kw_inline:
1309     nextToken();
1310     if (FormatTok->Tok.is(tok::kw_namespace)) {
1311       parseNamespace();
1312       return;
1313     }
1314     break;
1315   case tok::identifier:
1316     if (FormatTok->is(TT_ForEachMacro)) {
1317       parseForOrWhileLoop();
1318       return;
1319     }
1320     if (FormatTok->is(TT_MacroBlockBegin)) {
1321       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1322                  /*MunchSemi=*/false);
1323       return;
1324     }
1325     if (FormatTok->is(Keywords.kw_import)) {
1326       if (Style.isJavaScript()) {
1327         parseJavaScriptEs6ImportExport();
1328         return;
1329       }
1330       if (Style.Language == FormatStyle::LK_Proto) {
1331         nextToken();
1332         if (FormatTok->is(tok::kw_public))
1333           nextToken();
1334         if (!FormatTok->is(tok::string_literal))
1335           return;
1336         nextToken();
1337         if (FormatTok->is(tok::semi))
1338           nextToken();
1339         addUnwrappedLine();
1340         return;
1341       }
1342       if (Style.isCpp()) {
1343         parseModuleImport();
1344         return;
1345       }
1346     }
1347     if (Style.isCpp() &&
1348         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1349                            Keywords.kw_slots, Keywords.kw_qslots)) {
1350       nextToken();
1351       if (FormatTok->is(tok::colon)) {
1352         nextToken();
1353         addUnwrappedLine();
1354         return;
1355       }
1356     }
1357     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1358       parseStatementMacro();
1359       return;
1360     }
1361     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1362       parseNamespace();
1363       return;
1364     }
1365     // In all other cases, parse the declaration.
1366     break;
1367   default:
1368     break;
1369   }
1370   do {
1371     const FormatToken *Previous = FormatTok->Previous;
1372     switch (FormatTok->Tok.getKind()) {
1373     case tok::at:
1374       nextToken();
1375       if (FormatTok->Tok.is(tok::l_brace)) {
1376         nextToken();
1377         parseBracedList();
1378         break;
1379       } else if (Style.Language == FormatStyle::LK_Java &&
1380                  FormatTok->is(Keywords.kw_interface)) {
1381         nextToken();
1382         break;
1383       }
1384       switch (FormatTok->Tok.getObjCKeywordID()) {
1385       case tok::objc_public:
1386       case tok::objc_protected:
1387       case tok::objc_package:
1388       case tok::objc_private:
1389         return parseAccessSpecifier();
1390       case tok::objc_interface:
1391       case tok::objc_implementation:
1392         return parseObjCInterfaceOrImplementation();
1393       case tok::objc_protocol:
1394         if (parseObjCProtocol())
1395           return;
1396         break;
1397       case tok::objc_end:
1398         return; // Handled by the caller.
1399       case tok::objc_optional:
1400       case tok::objc_required:
1401         nextToken();
1402         addUnwrappedLine();
1403         return;
1404       case tok::objc_autoreleasepool:
1405         nextToken();
1406         if (FormatTok->Tok.is(tok::l_brace)) {
1407           if (Style.BraceWrapping.AfterControlStatement ==
1408               FormatStyle::BWACS_Always)
1409             addUnwrappedLine();
1410           parseBlock();
1411         }
1412         addUnwrappedLine();
1413         return;
1414       case tok::objc_synchronized:
1415         nextToken();
1416         if (FormatTok->Tok.is(tok::l_paren))
1417           // Skip synchronization object
1418           parseParens();
1419         if (FormatTok->Tok.is(tok::l_brace)) {
1420           if (Style.BraceWrapping.AfterControlStatement ==
1421               FormatStyle::BWACS_Always)
1422             addUnwrappedLine();
1423           parseBlock();
1424         }
1425         addUnwrappedLine();
1426         return;
1427       case tok::objc_try:
1428         // This branch isn't strictly necessary (the kw_try case below would
1429         // do this too after the tok::at is parsed above).  But be explicit.
1430         parseTryCatch();
1431         return;
1432       default:
1433         break;
1434       }
1435       break;
1436     case tok::kw_concept:
1437       parseConcept();
1438       return;
1439     case tok::kw_requires:
1440       parseRequires();
1441       return;
1442     case tok::kw_enum:
1443       // Ignore if this is part of "template <enum ...".
1444       if (Previous && Previous->is(tok::less)) {
1445         nextToken();
1446         break;
1447       }
1448 
1449       // parseEnum falls through and does not yet add an unwrapped line as an
1450       // enum definition can start a structural element.
1451       if (!parseEnum())
1452         break;
1453       // This only applies for C++.
1454       if (!Style.isCpp()) {
1455         addUnwrappedLine();
1456         return;
1457       }
1458       break;
1459     case tok::kw_typedef:
1460       nextToken();
1461       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1462                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1463                              Keywords.kw_CF_CLOSED_ENUM,
1464                              Keywords.kw_NS_CLOSED_ENUM))
1465         parseEnum();
1466       break;
1467     case tok::kw_struct:
1468     case tok::kw_union:
1469     case tok::kw_class:
1470       if (parseStructLike()) {
1471         return;
1472       }
1473       break;
1474     case tok::period:
1475       nextToken();
1476       // In Java, classes have an implicit static member "class".
1477       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1478           FormatTok->is(tok::kw_class))
1479         nextToken();
1480       if (Style.isJavaScript() && FormatTok &&
1481           FormatTok->Tok.getIdentifierInfo())
1482         // JavaScript only has pseudo keywords, all keywords are allowed to
1483         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1484         nextToken();
1485       break;
1486     case tok::semi:
1487       nextToken();
1488       addUnwrappedLine();
1489       return;
1490     case tok::r_brace:
1491       addUnwrappedLine();
1492       return;
1493     case tok::l_paren: {
1494       parseParens();
1495       // Break the unwrapped line if a K&R C function definition has a parameter
1496       // declaration.
1497       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1498         break;
1499       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1500         addUnwrappedLine();
1501         return;
1502       }
1503       break;
1504     }
1505     case tok::kw_operator:
1506       nextToken();
1507       if (FormatTok->isBinaryOperator())
1508         nextToken();
1509       break;
1510     case tok::caret:
1511       nextToken();
1512       if (FormatTok->Tok.isAnyIdentifier() ||
1513           FormatTok->isSimpleTypeSpecifier())
1514         nextToken();
1515       if (FormatTok->is(tok::l_paren))
1516         parseParens();
1517       if (FormatTok->is(tok::l_brace))
1518         parseChildBlock();
1519       break;
1520     case tok::l_brace:
1521       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1522         // A block outside of parentheses must be the last part of a
1523         // structural element.
1524         // FIXME: Figure out cases where this is not true, and add projections
1525         // for them (the one we know is missing are lambdas).
1526         if (Style.BraceWrapping.AfterFunction)
1527           addUnwrappedLine();
1528         FormatTok->setType(TT_FunctionLBrace);
1529         parseBlock();
1530         addUnwrappedLine();
1531         return;
1532       }
1533       // Otherwise this was a braced init list, and the structural
1534       // element continues.
1535       break;
1536     case tok::kw_try:
1537       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1538         // field/method declaration.
1539         nextToken();
1540         break;
1541       }
1542       // We arrive here when parsing function-try blocks.
1543       if (Style.BraceWrapping.AfterFunction)
1544         addUnwrappedLine();
1545       parseTryCatch();
1546       return;
1547     case tok::identifier: {
1548       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1549           Line->MustBeDeclaration) {
1550         addUnwrappedLine();
1551         parseCSharpGenericTypeConstraint();
1552         break;
1553       }
1554       if (FormatTok->is(TT_MacroBlockEnd)) {
1555         addUnwrappedLine();
1556         return;
1557       }
1558 
1559       // Function declarations (as opposed to function expressions) are parsed
1560       // on their own unwrapped line by continuing this loop. Function
1561       // expressions (functions that are not on their own line) must not create
1562       // a new unwrapped line, so they are special cased below.
1563       size_t TokenCount = Line->Tokens.size();
1564       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1565           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1566                                                      Keywords.kw_async)))) {
1567         tryToParseJSFunction();
1568         break;
1569       }
1570       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1571           FormatTok->is(Keywords.kw_interface)) {
1572         if (Style.isJavaScript()) {
1573           // In JavaScript/TypeScript, "interface" can be used as a standalone
1574           // identifier, e.g. in `var interface = 1;`. If "interface" is
1575           // followed by another identifier, it is very like to be an actual
1576           // interface declaration.
1577           unsigned StoredPosition = Tokens->getPosition();
1578           FormatToken *Next = Tokens->getNextToken();
1579           FormatTok = Tokens->setPosition(StoredPosition);
1580           if (!mustBeJSIdent(Keywords, Next)) {
1581             nextToken();
1582             break;
1583           }
1584         }
1585         parseRecord();
1586         addUnwrappedLine();
1587         return;
1588       }
1589 
1590       if (FormatTok->is(Keywords.kw_interface)) {
1591         if (parseStructLike()) {
1592           return;
1593         }
1594         break;
1595       }
1596 
1597       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1598         parseStatementMacro();
1599         return;
1600       }
1601 
1602       // See if the following token should start a new unwrapped line.
1603       StringRef Text = FormatTok->TokenText;
1604       nextToken();
1605 
1606       // JS doesn't have macros, and within classes colons indicate fields, not
1607       // labels.
1608       if (Style.isJavaScript())
1609         break;
1610 
1611       TokenCount = Line->Tokens.size();
1612       if (TokenCount == 1 ||
1613           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1614         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1615           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1616           parseLabel(!Style.IndentGotoLabels);
1617           return;
1618         }
1619         // Recognize function-like macro usages without trailing semicolon as
1620         // well as free-standing macros like Q_OBJECT.
1621         bool FunctionLike = FormatTok->is(tok::l_paren);
1622         if (FunctionLike)
1623           parseParens();
1624 
1625         bool FollowedByNewline =
1626             CommentsBeforeNextToken.empty()
1627                 ? FormatTok->NewlinesBefore > 0
1628                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1629 
1630         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1631             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1632           addUnwrappedLine();
1633           return;
1634         }
1635       }
1636       break;
1637     }
1638     case tok::equal:
1639       if ((Style.isJavaScript() || Style.isCSharp()) &&
1640           FormatTok->is(TT_FatArrow)) {
1641         tryToParseChildBlock();
1642         break;
1643       }
1644 
1645       nextToken();
1646       if (FormatTok->Tok.is(tok::l_brace)) {
1647         // Block kind should probably be set to BK_BracedInit for any language.
1648         // C# needs this change to ensure that array initialisers and object
1649         // initialisers are indented the same way.
1650         if (Style.isCSharp())
1651           FormatTok->setBlockKind(BK_BracedInit);
1652         nextToken();
1653         parseBracedList();
1654       } else if (Style.Language == FormatStyle::LK_Proto &&
1655                  FormatTok->Tok.is(tok::less)) {
1656         nextToken();
1657         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1658                         /*ClosingBraceKind=*/tok::greater);
1659       }
1660       break;
1661     case tok::l_square:
1662       parseSquare();
1663       break;
1664     case tok::kw_new:
1665       parseNew();
1666       break;
1667     default:
1668       nextToken();
1669       break;
1670     }
1671   } while (!eof());
1672 }
1673 
1674 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1675   assert(FormatTok->is(tok::l_brace));
1676   if (!Style.isCSharp())
1677     return false;
1678   // See if it's a property accessor.
1679   if (FormatTok->Previous->isNot(tok::identifier))
1680     return false;
1681 
1682   // See if we are inside a property accessor.
1683   //
1684   // Record the current tokenPosition so that we can advance and
1685   // reset the current token. `Next` is not set yet so we need
1686   // another way to advance along the token stream.
1687   unsigned int StoredPosition = Tokens->getPosition();
1688   FormatToken *Tok = Tokens->getNextToken();
1689 
1690   // A trivial property accessor is of the form:
1691   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1692   // Track these as they do not require line breaks to be introduced.
1693   bool HasGetOrSet = false;
1694   bool IsTrivialPropertyAccessor = true;
1695   while (!eof()) {
1696     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1697                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1698                      Keywords.kw_set)) {
1699       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1700         HasGetOrSet = true;
1701       Tok = Tokens->getNextToken();
1702       continue;
1703     }
1704     if (Tok->isNot(tok::r_brace))
1705       IsTrivialPropertyAccessor = false;
1706     break;
1707   }
1708 
1709   if (!HasGetOrSet) {
1710     Tokens->setPosition(StoredPosition);
1711     return false;
1712   }
1713 
1714   // Try to parse the property accessor:
1715   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1716   Tokens->setPosition(StoredPosition);
1717   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1718     addUnwrappedLine();
1719   nextToken();
1720   do {
1721     switch (FormatTok->Tok.getKind()) {
1722     case tok::r_brace:
1723       nextToken();
1724       if (FormatTok->is(tok::equal)) {
1725         while (!eof() && FormatTok->isNot(tok::semi))
1726           nextToken();
1727         nextToken();
1728       }
1729       addUnwrappedLine();
1730       return true;
1731     case tok::l_brace:
1732       ++Line->Level;
1733       parseBlock(/*MustBeDeclaration=*/true);
1734       addUnwrappedLine();
1735       --Line->Level;
1736       break;
1737     case tok::equal:
1738       if (FormatTok->is(TT_FatArrow)) {
1739         ++Line->Level;
1740         do {
1741           nextToken();
1742         } while (!eof() && FormatTok->isNot(tok::semi));
1743         nextToken();
1744         addUnwrappedLine();
1745         --Line->Level;
1746         break;
1747       }
1748       nextToken();
1749       break;
1750     default:
1751       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1752           !IsTrivialPropertyAccessor) {
1753         // Non-trivial get/set needs to be on its own line.
1754         addUnwrappedLine();
1755       }
1756       nextToken();
1757     }
1758   } while (!eof());
1759 
1760   // Unreachable for well-formed code (paired '{' and '}').
1761   return true;
1762 }
1763 
1764 bool UnwrappedLineParser::tryToParseLambda() {
1765   if (!Style.isCpp()) {
1766     nextToken();
1767     return false;
1768   }
1769   assert(FormatTok->is(tok::l_square));
1770   FormatToken &LSquare = *FormatTok;
1771   if (!tryToParseLambdaIntroducer())
1772     return false;
1773 
1774   bool SeenArrow = false;
1775 
1776   while (FormatTok->isNot(tok::l_brace)) {
1777     if (FormatTok->isSimpleTypeSpecifier()) {
1778       nextToken();
1779       continue;
1780     }
1781     switch (FormatTok->Tok.getKind()) {
1782     case tok::l_brace:
1783       break;
1784     case tok::l_paren:
1785       parseParens();
1786       break;
1787     case tok::amp:
1788     case tok::star:
1789     case tok::kw_const:
1790     case tok::comma:
1791     case tok::less:
1792     case tok::greater:
1793     case tok::identifier:
1794     case tok::numeric_constant:
1795     case tok::coloncolon:
1796     case tok::kw_class:
1797     case tok::kw_mutable:
1798     case tok::kw_noexcept:
1799     case tok::kw_template:
1800     case tok::kw_typename:
1801       nextToken();
1802       break;
1803     // Specialization of a template with an integer parameter can contain
1804     // arithmetic, logical, comparison and ternary operators.
1805     //
1806     // FIXME: This also accepts sequences of operators that are not in the scope
1807     // of a template argument list.
1808     //
1809     // In a C++ lambda a template type can only occur after an arrow. We use
1810     // this as an heuristic to distinguish between Objective-C expressions
1811     // followed by an `a->b` expression, such as:
1812     // ([obj func:arg] + a->b)
1813     // Otherwise the code below would parse as a lambda.
1814     //
1815     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1816     // explicit template lists: []<bool b = true && false>(U &&u){}
1817     case tok::plus:
1818     case tok::minus:
1819     case tok::exclaim:
1820     case tok::tilde:
1821     case tok::slash:
1822     case tok::percent:
1823     case tok::lessless:
1824     case tok::pipe:
1825     case tok::pipepipe:
1826     case tok::ampamp:
1827     case tok::caret:
1828     case tok::equalequal:
1829     case tok::exclaimequal:
1830     case tok::greaterequal:
1831     case tok::lessequal:
1832     case tok::question:
1833     case tok::colon:
1834     case tok::ellipsis:
1835     case tok::kw_true:
1836     case tok::kw_false:
1837       if (SeenArrow) {
1838         nextToken();
1839         break;
1840       }
1841       return true;
1842     case tok::arrow:
1843       // This might or might not actually be a lambda arrow (this could be an
1844       // ObjC method invocation followed by a dereferencing arrow). We might
1845       // reset this back to TT_Unknown in TokenAnnotator.
1846       FormatTok->setType(TT_LambdaArrow);
1847       SeenArrow = true;
1848       nextToken();
1849       break;
1850     default:
1851       return true;
1852     }
1853   }
1854   FormatTok->setType(TT_LambdaLBrace);
1855   LSquare.setType(TT_LambdaLSquare);
1856   parseChildBlock();
1857   return true;
1858 }
1859 
1860 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1861   const FormatToken *Previous = FormatTok->Previous;
1862   if (Previous &&
1863       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1864                          tok::kw_delete, tok::l_square) ||
1865        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1866        Previous->isSimpleTypeSpecifier())) {
1867     nextToken();
1868     return false;
1869   }
1870   nextToken();
1871   if (FormatTok->is(tok::l_square)) {
1872     return false;
1873   }
1874   parseSquare(/*LambdaIntroducer=*/true);
1875   return true;
1876 }
1877 
1878 void UnwrappedLineParser::tryToParseJSFunction() {
1879   assert(FormatTok->is(Keywords.kw_function) ||
1880          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1881   if (FormatTok->is(Keywords.kw_async))
1882     nextToken();
1883   // Consume "function".
1884   nextToken();
1885 
1886   // Consume * (generator function). Treat it like C++'s overloaded operators.
1887   if (FormatTok->is(tok::star)) {
1888     FormatTok->setType(TT_OverloadedOperator);
1889     nextToken();
1890   }
1891 
1892   // Consume function name.
1893   if (FormatTok->is(tok::identifier))
1894     nextToken();
1895 
1896   if (FormatTok->isNot(tok::l_paren))
1897     return;
1898 
1899   // Parse formal parameter list.
1900   parseParens();
1901 
1902   if (FormatTok->is(tok::colon)) {
1903     // Parse a type definition.
1904     nextToken();
1905 
1906     // Eat the type declaration. For braced inline object types, balance braces,
1907     // otherwise just parse until finding an l_brace for the function body.
1908     if (FormatTok->is(tok::l_brace))
1909       tryToParseBracedList();
1910     else
1911       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1912         nextToken();
1913   }
1914 
1915   if (FormatTok->is(tok::semi))
1916     return;
1917 
1918   parseChildBlock();
1919 }
1920 
1921 bool UnwrappedLineParser::tryToParseBracedList() {
1922   if (FormatTok->is(BK_Unknown))
1923     calculateBraceTypes();
1924   assert(FormatTok->isNot(BK_Unknown));
1925   if (FormatTok->is(BK_Block))
1926     return false;
1927   nextToken();
1928   parseBracedList();
1929   return true;
1930 }
1931 
1932 bool UnwrappedLineParser::tryToParseChildBlock() {
1933   assert(Style.isJavaScript() || Style.isCSharp());
1934   assert(FormatTok->is(TT_FatArrow));
1935   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
1936   // They always start an expression or a child block if followed by a curly
1937   // brace.
1938   nextToken();
1939   if (FormatTok->isNot(tok::l_brace))
1940     return false;
1941   parseChildBlock();
1942   return true;
1943 }
1944 
1945 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1946                                           bool IsEnum,
1947                                           tok::TokenKind ClosingBraceKind) {
1948   bool HasError = false;
1949 
1950   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1951   // replace this by using parseAssignmentExpression() inside.
1952   do {
1953     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
1954         tryToParseChildBlock())
1955       continue;
1956     if (Style.isJavaScript()) {
1957       if (FormatTok->is(Keywords.kw_function) ||
1958           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1959         tryToParseJSFunction();
1960         continue;
1961       }
1962       if (FormatTok->is(tok::l_brace)) {
1963         // Could be a method inside of a braced list `{a() { return 1; }}`.
1964         if (tryToParseBracedList())
1965           continue;
1966         parseChildBlock();
1967       }
1968     }
1969     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1970       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1971         addUnwrappedLine();
1972       nextToken();
1973       return !HasError;
1974     }
1975     switch (FormatTok->Tok.getKind()) {
1976     case tok::l_square:
1977       if (Style.isCSharp())
1978         parseSquare();
1979       else
1980         tryToParseLambda();
1981       break;
1982     case tok::l_paren:
1983       parseParens();
1984       // JavaScript can just have free standing methods and getters/setters in
1985       // object literals. Detect them by a "{" following ")".
1986       if (Style.isJavaScript()) {
1987         if (FormatTok->is(tok::l_brace))
1988           parseChildBlock();
1989         break;
1990       }
1991       break;
1992     case tok::l_brace:
1993       // Assume there are no blocks inside a braced init list apart
1994       // from the ones we explicitly parse out (like lambdas).
1995       FormatTok->setBlockKind(BK_BracedInit);
1996       nextToken();
1997       parseBracedList();
1998       break;
1999     case tok::less:
2000       if (Style.Language == FormatStyle::LK_Proto) {
2001         nextToken();
2002         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2003                         /*ClosingBraceKind=*/tok::greater);
2004       } else {
2005         nextToken();
2006       }
2007       break;
2008     case tok::semi:
2009       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2010       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2011       // used for error recovery if we have otherwise determined that this is
2012       // a braced list.
2013       if (Style.isJavaScript()) {
2014         nextToken();
2015         break;
2016       }
2017       HasError = true;
2018       if (!ContinueOnSemicolons)
2019         return !HasError;
2020       nextToken();
2021       break;
2022     case tok::comma:
2023       nextToken();
2024       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2025         addUnwrappedLine();
2026       break;
2027     default:
2028       nextToken();
2029       break;
2030     }
2031   } while (!eof());
2032   return false;
2033 }
2034 
2035 void UnwrappedLineParser::parseParens() {
2036   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2037   nextToken();
2038   do {
2039     switch (FormatTok->Tok.getKind()) {
2040     case tok::l_paren:
2041       parseParens();
2042       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2043         parseChildBlock();
2044       break;
2045     case tok::r_paren:
2046       nextToken();
2047       return;
2048     case tok::r_brace:
2049       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2050       return;
2051     case tok::l_square:
2052       tryToParseLambda();
2053       break;
2054     case tok::l_brace:
2055       if (!tryToParseBracedList())
2056         parseChildBlock();
2057       break;
2058     case tok::at:
2059       nextToken();
2060       if (FormatTok->Tok.is(tok::l_brace)) {
2061         nextToken();
2062         parseBracedList();
2063       }
2064       break;
2065     case tok::equal:
2066       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2067         tryToParseChildBlock();
2068       else
2069         nextToken();
2070       break;
2071     case tok::kw_class:
2072       if (Style.isJavaScript())
2073         parseRecord(/*ParseAsExpr=*/true);
2074       else
2075         nextToken();
2076       break;
2077     case tok::identifier:
2078       if (Style.isJavaScript() &&
2079           (FormatTok->is(Keywords.kw_function) ||
2080            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2081         tryToParseJSFunction();
2082       else
2083         nextToken();
2084       break;
2085     default:
2086       nextToken();
2087       break;
2088     }
2089   } while (!eof());
2090 }
2091 
2092 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2093   if (!LambdaIntroducer) {
2094     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2095     if (tryToParseLambda())
2096       return;
2097   }
2098   do {
2099     switch (FormatTok->Tok.getKind()) {
2100     case tok::l_paren:
2101       parseParens();
2102       break;
2103     case tok::r_square:
2104       nextToken();
2105       return;
2106     case tok::r_brace:
2107       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2108       return;
2109     case tok::l_square:
2110       parseSquare();
2111       break;
2112     case tok::l_brace: {
2113       if (!tryToParseBracedList())
2114         parseChildBlock();
2115       break;
2116     }
2117     case tok::at:
2118       nextToken();
2119       if (FormatTok->Tok.is(tok::l_brace)) {
2120         nextToken();
2121         parseBracedList();
2122       }
2123       break;
2124     default:
2125       nextToken();
2126       break;
2127     }
2128   } while (!eof());
2129 }
2130 
2131 void UnwrappedLineParser::parseIfThenElse() {
2132   auto HandleAttributes = [this]() {
2133     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2134     if (FormatTok->is(TT_AttributeMacro))
2135       nextToken();
2136     // Handle [[likely]] / [[unlikely]] attributes.
2137     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2138       parseSquare();
2139   };
2140 
2141   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2142   nextToken();
2143   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2144     nextToken();
2145   if (FormatTok->Tok.is(tok::l_paren))
2146     parseParens();
2147   HandleAttributes();
2148   bool NeedsUnwrappedLine = false;
2149   if (FormatTok->Tok.is(tok::l_brace)) {
2150     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2151     parseBlock();
2152     if (Style.BraceWrapping.BeforeElse)
2153       addUnwrappedLine();
2154     else
2155       NeedsUnwrappedLine = true;
2156   } else {
2157     addUnwrappedLine();
2158     ++Line->Level;
2159     parseStructuralElement();
2160     --Line->Level;
2161   }
2162   if (FormatTok->Tok.is(tok::kw_else)) {
2163     nextToken();
2164     HandleAttributes();
2165     if (FormatTok->Tok.is(tok::l_brace)) {
2166       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2167       parseBlock();
2168       addUnwrappedLine();
2169     } else if (FormatTok->Tok.is(tok::kw_if)) {
2170       FormatToken *Previous = Tokens->getPreviousToken();
2171       bool PrecededByComment = Previous && Previous->is(tok::comment);
2172       if (PrecededByComment) {
2173         addUnwrappedLine();
2174         ++Line->Level;
2175       }
2176       parseIfThenElse();
2177       if (PrecededByComment)
2178         --Line->Level;
2179     } else {
2180       addUnwrappedLine();
2181       ++Line->Level;
2182       parseStructuralElement();
2183       if (FormatTok->is(tok::eof))
2184         addUnwrappedLine();
2185       --Line->Level;
2186     }
2187   } else if (NeedsUnwrappedLine) {
2188     addUnwrappedLine();
2189   }
2190 }
2191 
2192 void UnwrappedLineParser::parseTryCatch() {
2193   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2194   nextToken();
2195   bool NeedsUnwrappedLine = false;
2196   if (FormatTok->is(tok::colon)) {
2197     // We are in a function try block, what comes is an initializer list.
2198     nextToken();
2199 
2200     // In case identifiers were removed by clang-tidy, what might follow is
2201     // multiple commas in sequence - before the first identifier.
2202     while (FormatTok->is(tok::comma))
2203       nextToken();
2204 
2205     while (FormatTok->is(tok::identifier)) {
2206       nextToken();
2207       if (FormatTok->is(tok::l_paren))
2208         parseParens();
2209       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2210           FormatTok->is(tok::l_brace)) {
2211         do {
2212           nextToken();
2213         } while (!FormatTok->is(tok::r_brace));
2214         nextToken();
2215       }
2216 
2217       // In case identifiers were removed by clang-tidy, what might follow is
2218       // multiple commas in sequence - after the first identifier.
2219       while (FormatTok->is(tok::comma))
2220         nextToken();
2221     }
2222   }
2223   // Parse try with resource.
2224   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2225     parseParens();
2226   }
2227   if (FormatTok->is(tok::l_brace)) {
2228     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2229     parseBlock();
2230     if (Style.BraceWrapping.BeforeCatch) {
2231       addUnwrappedLine();
2232     } else {
2233       NeedsUnwrappedLine = true;
2234     }
2235   } else if (!FormatTok->is(tok::kw_catch)) {
2236     // The C++ standard requires a compound-statement after a try.
2237     // If there's none, we try to assume there's a structuralElement
2238     // and try to continue.
2239     addUnwrappedLine();
2240     ++Line->Level;
2241     parseStructuralElement();
2242     --Line->Level;
2243   }
2244   while (1) {
2245     if (FormatTok->is(tok::at))
2246       nextToken();
2247     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2248                              tok::kw___finally) ||
2249           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2250            FormatTok->is(Keywords.kw_finally)) ||
2251           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2252            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2253       break;
2254     nextToken();
2255     while (FormatTok->isNot(tok::l_brace)) {
2256       if (FormatTok->is(tok::l_paren)) {
2257         parseParens();
2258         continue;
2259       }
2260       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2261         return;
2262       nextToken();
2263     }
2264     NeedsUnwrappedLine = false;
2265     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2266     parseBlock();
2267     if (Style.BraceWrapping.BeforeCatch)
2268       addUnwrappedLine();
2269     else
2270       NeedsUnwrappedLine = true;
2271   }
2272   if (NeedsUnwrappedLine)
2273     addUnwrappedLine();
2274 }
2275 
2276 void UnwrappedLineParser::parseNamespace() {
2277   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2278          "'namespace' expected");
2279 
2280   const FormatToken &InitialToken = *FormatTok;
2281   nextToken();
2282   if (InitialToken.is(TT_NamespaceMacro)) {
2283     parseParens();
2284   } else {
2285     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2286                               tok::l_square, tok::period)) {
2287       if (FormatTok->is(tok::l_square))
2288         parseSquare();
2289       else
2290         nextToken();
2291     }
2292   }
2293   if (FormatTok->Tok.is(tok::l_brace)) {
2294     if (ShouldBreakBeforeBrace(Style, InitialToken))
2295       addUnwrappedLine();
2296 
2297     unsigned AddLevels =
2298         Style.NamespaceIndentation == FormatStyle::NI_All ||
2299                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2300                  DeclarationScopeStack.size() > 1)
2301             ? 1u
2302             : 0u;
2303     bool ManageWhitesmithsBraces =
2304         AddLevels == 0u &&
2305         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2306 
2307     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2308     // the whole block.
2309     if (ManageWhitesmithsBraces)
2310       ++Line->Level;
2311 
2312     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2313                /*MunchSemi=*/true,
2314                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2315 
2316     // Munch the semicolon after a namespace. This is more common than one would
2317     // think. Putting the semicolon into its own line is very ugly.
2318     if (FormatTok->Tok.is(tok::semi))
2319       nextToken();
2320 
2321     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2322 
2323     if (ManageWhitesmithsBraces)
2324       --Line->Level;
2325   }
2326   // FIXME: Add error handling.
2327 }
2328 
2329 void UnwrappedLineParser::parseNew() {
2330   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2331   nextToken();
2332 
2333   if (Style.isCSharp()) {
2334     do {
2335       if (FormatTok->is(tok::l_brace))
2336         parseBracedList();
2337 
2338       if (FormatTok->isOneOf(tok::semi, tok::comma))
2339         return;
2340 
2341       nextToken();
2342     } while (!eof());
2343   }
2344 
2345   if (Style.Language != FormatStyle::LK_Java)
2346     return;
2347 
2348   // In Java, we can parse everything up to the parens, which aren't optional.
2349   do {
2350     // There should not be a ;, { or } before the new's open paren.
2351     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2352       return;
2353 
2354     // Consume the parens.
2355     if (FormatTok->is(tok::l_paren)) {
2356       parseParens();
2357 
2358       // If there is a class body of an anonymous class, consume that as child.
2359       if (FormatTok->is(tok::l_brace))
2360         parseChildBlock();
2361       return;
2362     }
2363     nextToken();
2364   } while (!eof());
2365 }
2366 
2367 void UnwrappedLineParser::parseForOrWhileLoop() {
2368   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2369          "'for', 'while' or foreach macro expected");
2370   nextToken();
2371   // JS' for await ( ...
2372   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2373     nextToken();
2374   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2375     nextToken();
2376   if (FormatTok->Tok.is(tok::l_paren))
2377     parseParens();
2378   if (FormatTok->Tok.is(tok::l_brace)) {
2379     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2380     parseBlock();
2381     addUnwrappedLine();
2382   } else {
2383     addUnwrappedLine();
2384     ++Line->Level;
2385     parseStructuralElement();
2386     --Line->Level;
2387   }
2388 }
2389 
2390 void UnwrappedLineParser::parseDoWhile() {
2391   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2392   nextToken();
2393   if (FormatTok->Tok.is(tok::l_brace)) {
2394     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2395     parseBlock();
2396     if (Style.BraceWrapping.BeforeWhile)
2397       addUnwrappedLine();
2398   } else {
2399     addUnwrappedLine();
2400     ++Line->Level;
2401     parseStructuralElement();
2402     --Line->Level;
2403   }
2404 
2405   // FIXME: Add error handling.
2406   if (!FormatTok->Tok.is(tok::kw_while)) {
2407     addUnwrappedLine();
2408     return;
2409   }
2410 
2411   // If in Whitesmiths mode, the line with the while() needs to be indented
2412   // to the same level as the block.
2413   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2414     ++Line->Level;
2415 
2416   nextToken();
2417   parseStructuralElement();
2418 }
2419 
2420 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2421   nextToken();
2422   unsigned OldLineLevel = Line->Level;
2423   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2424     --Line->Level;
2425   if (LeftAlignLabel)
2426     Line->Level = 0;
2427 
2428   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2429       FormatTok->Tok.is(tok::l_brace)) {
2430 
2431     CompoundStatementIndenter Indenter(this, Line->Level,
2432                                        Style.BraceWrapping.AfterCaseLabel,
2433                                        Style.BraceWrapping.IndentBraces);
2434     parseBlock();
2435     if (FormatTok->Tok.is(tok::kw_break)) {
2436       if (Style.BraceWrapping.AfterControlStatement ==
2437           FormatStyle::BWACS_Always) {
2438         addUnwrappedLine();
2439         if (!Style.IndentCaseBlocks &&
2440             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2441           Line->Level++;
2442         }
2443       }
2444       parseStructuralElement();
2445     }
2446     addUnwrappedLine();
2447   } else {
2448     if (FormatTok->is(tok::semi))
2449       nextToken();
2450     addUnwrappedLine();
2451   }
2452   Line->Level = OldLineLevel;
2453   if (FormatTok->isNot(tok::l_brace)) {
2454     parseStructuralElement();
2455     addUnwrappedLine();
2456   }
2457 }
2458 
2459 void UnwrappedLineParser::parseCaseLabel() {
2460   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2461 
2462   // FIXME: fix handling of complex expressions here.
2463   do {
2464     nextToken();
2465   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2466   parseLabel();
2467 }
2468 
2469 void UnwrappedLineParser::parseSwitch() {
2470   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2471   nextToken();
2472   if (FormatTok->Tok.is(tok::l_paren))
2473     parseParens();
2474   if (FormatTok->Tok.is(tok::l_brace)) {
2475     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2476     parseBlock();
2477     addUnwrappedLine();
2478   } else {
2479     addUnwrappedLine();
2480     ++Line->Level;
2481     parseStructuralElement();
2482     --Line->Level;
2483   }
2484 }
2485 
2486 void UnwrappedLineParser::parseAccessSpecifier() {
2487   nextToken();
2488   // Understand Qt's slots.
2489   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2490     nextToken();
2491   // Otherwise, we don't know what it is, and we'd better keep the next token.
2492   if (FormatTok->Tok.is(tok::colon))
2493     nextToken();
2494   addUnwrappedLine();
2495 }
2496 
2497 void UnwrappedLineParser::parseConcept() {
2498   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2499   nextToken();
2500   if (!FormatTok->Tok.is(tok::identifier))
2501     return;
2502   nextToken();
2503   if (!FormatTok->Tok.is(tok::equal))
2504     return;
2505   nextToken();
2506   if (FormatTok->Tok.is(tok::kw_requires)) {
2507     nextToken();
2508     parseRequiresExpression(Line->Level);
2509   } else {
2510     parseConstraintExpression(Line->Level);
2511   }
2512 }
2513 
2514 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2515   // requires (R range)
2516   if (FormatTok->Tok.is(tok::l_paren)) {
2517     parseParens();
2518     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2519       addUnwrappedLine();
2520       --Line->Level;
2521     }
2522   }
2523 
2524   if (FormatTok->Tok.is(tok::l_brace)) {
2525     if (Style.BraceWrapping.AfterFunction)
2526       addUnwrappedLine();
2527     FormatTok->setType(TT_FunctionLBrace);
2528     parseBlock();
2529     addUnwrappedLine();
2530   } else {
2531     parseConstraintExpression(OriginalLevel);
2532   }
2533 }
2534 
2535 void UnwrappedLineParser::parseConstraintExpression(
2536     unsigned int OriginalLevel) {
2537   // requires Id<T> && Id<T> || Id<T>
2538   while (
2539       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2540     nextToken();
2541     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2542                               tok::greater, tok::comma, tok::ellipsis)) {
2543       if (FormatTok->Tok.is(tok::less)) {
2544         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2545                         /*ClosingBraceKind=*/tok::greater);
2546         continue;
2547       }
2548       nextToken();
2549     }
2550     if (FormatTok->Tok.is(tok::kw_requires)) {
2551       parseRequiresExpression(OriginalLevel);
2552     }
2553     if (FormatTok->Tok.is(tok::less)) {
2554       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2555                       /*ClosingBraceKind=*/tok::greater);
2556     }
2557 
2558     if (FormatTok->Tok.is(tok::l_paren)) {
2559       parseParens();
2560     }
2561     if (FormatTok->Tok.is(tok::l_brace)) {
2562       if (Style.BraceWrapping.AfterFunction)
2563         addUnwrappedLine();
2564       FormatTok->setType(TT_FunctionLBrace);
2565       parseBlock();
2566     }
2567     if (FormatTok->Tok.is(tok::semi)) {
2568       // Eat any trailing semi.
2569       nextToken();
2570       addUnwrappedLine();
2571     }
2572     if (FormatTok->Tok.is(tok::colon)) {
2573       return;
2574     }
2575     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2576       if (FormatTok->Previous &&
2577           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2578                                         tok::coloncolon)) {
2579         addUnwrappedLine();
2580       }
2581       if (Style.IndentRequires && OriginalLevel != Line->Level) {
2582         --Line->Level;
2583       }
2584       break;
2585     } else {
2586       FormatTok->setType(TT_ConstraintJunctions);
2587     }
2588 
2589     nextToken();
2590   }
2591 }
2592 
2593 void UnwrappedLineParser::parseRequires() {
2594   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2595 
2596   unsigned OriginalLevel = Line->Level;
2597   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2598     addUnwrappedLine();
2599     if (Style.IndentRequires) {
2600       Line->Level++;
2601     }
2602   }
2603   nextToken();
2604 
2605   parseRequiresExpression(OriginalLevel);
2606 }
2607 
2608 bool UnwrappedLineParser::parseEnum() {
2609   // Won't be 'enum' for NS_ENUMs.
2610   if (FormatTok->Tok.is(tok::kw_enum))
2611     nextToken();
2612 
2613   const FormatToken &InitialToken = *FormatTok;
2614 
2615   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2616   // declarations. An "enum" keyword followed by a colon would be a syntax
2617   // error and thus assume it is just an identifier.
2618   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
2619     return false;
2620 
2621   // In protobuf, "enum" can be used as a field name.
2622   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2623     return false;
2624 
2625   // Eat up enum class ...
2626   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2627     nextToken();
2628 
2629   while (FormatTok->Tok.getIdentifierInfo() ||
2630          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2631                             tok::greater, tok::comma, tok::question)) {
2632     nextToken();
2633     // We can have macros or attributes in between 'enum' and the enum name.
2634     if (FormatTok->is(tok::l_paren))
2635       parseParens();
2636     if (FormatTok->is(tok::identifier)) {
2637       nextToken();
2638       // If there are two identifiers in a row, this is likely an elaborate
2639       // return type. In Java, this can be "implements", etc.
2640       if (Style.isCpp() && FormatTok->is(tok::identifier))
2641         return false;
2642     }
2643   }
2644 
2645   // Just a declaration or something is wrong.
2646   if (FormatTok->isNot(tok::l_brace))
2647     return true;
2648   FormatTok->setBlockKind(BK_Block);
2649 
2650   if (Style.Language == FormatStyle::LK_Java) {
2651     // Java enums are different.
2652     parseJavaEnumBody();
2653     return true;
2654   }
2655   if (Style.Language == FormatStyle::LK_Proto) {
2656     parseBlock(/*MustBeDeclaration=*/true);
2657     return true;
2658   }
2659 
2660   if (!Style.AllowShortEnumsOnASingleLine &&
2661       ShouldBreakBeforeBrace(Style, InitialToken))
2662     addUnwrappedLine();
2663   // Parse enum body.
2664   nextToken();
2665   if (!Style.AllowShortEnumsOnASingleLine) {
2666     addUnwrappedLine();
2667     Line->Level += 1;
2668   }
2669   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2670                                    /*IsEnum=*/true);
2671   if (!Style.AllowShortEnumsOnASingleLine)
2672     Line->Level -= 1;
2673   if (HasError) {
2674     if (FormatTok->is(tok::semi))
2675       nextToken();
2676     addUnwrappedLine();
2677   }
2678   return true;
2679 
2680   // There is no addUnwrappedLine() here so that we fall through to parsing a
2681   // structural element afterwards. Thus, in "enum A {} n, m;",
2682   // "} n, m;" will end up in one unwrapped line.
2683 }
2684 
2685 bool UnwrappedLineParser::parseStructLike() {
2686   // parseRecord falls through and does not yet add an unwrapped line as a
2687   // record declaration or definition can start a structural element.
2688   parseRecord();
2689   // This does not apply to Java, JavaScript and C#.
2690   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
2691       Style.isCSharp()) {
2692     if (FormatTok->is(tok::semi))
2693       nextToken();
2694     addUnwrappedLine();
2695     return true;
2696   }
2697   return false;
2698 }
2699 
2700 namespace {
2701 // A class used to set and restore the Token position when peeking
2702 // ahead in the token source.
2703 class ScopedTokenPosition {
2704   unsigned StoredPosition;
2705   FormatTokenSource *Tokens;
2706 
2707 public:
2708   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2709     assert(Tokens && "Tokens expected to not be null");
2710     StoredPosition = Tokens->getPosition();
2711   }
2712 
2713   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2714 };
2715 } // namespace
2716 
2717 // Look to see if we have [[ by looking ahead, if
2718 // its not then rewind to the original position.
2719 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2720   ScopedTokenPosition AutoPosition(Tokens);
2721   FormatToken *Tok = Tokens->getNextToken();
2722   // We already read the first [ check for the second.
2723   if (!Tok->is(tok::l_square)) {
2724     return false;
2725   }
2726   // Double check that the attribute is just something
2727   // fairly simple.
2728   while (Tok->isNot(tok::eof)) {
2729     if (Tok->is(tok::r_square)) {
2730       break;
2731     }
2732     Tok = Tokens->getNextToken();
2733   }
2734   if (Tok->is(tok::eof))
2735     return false;
2736   Tok = Tokens->getNextToken();
2737   if (!Tok->is(tok::r_square)) {
2738     return false;
2739   }
2740   Tok = Tokens->getNextToken();
2741   if (Tok->is(tok::semi)) {
2742     return false;
2743   }
2744   return true;
2745 }
2746 
2747 void UnwrappedLineParser::parseJavaEnumBody() {
2748   // Determine whether the enum is simple, i.e. does not have a semicolon or
2749   // constants with class bodies. Simple enums can be formatted like braced
2750   // lists, contracted to a single line, etc.
2751   unsigned StoredPosition = Tokens->getPosition();
2752   bool IsSimple = true;
2753   FormatToken *Tok = Tokens->getNextToken();
2754   while (!Tok->is(tok::eof)) {
2755     if (Tok->is(tok::r_brace))
2756       break;
2757     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2758       IsSimple = false;
2759       break;
2760     }
2761     // FIXME: This will also mark enums with braces in the arguments to enum
2762     // constants as "not simple". This is probably fine in practice, though.
2763     Tok = Tokens->getNextToken();
2764   }
2765   FormatTok = Tokens->setPosition(StoredPosition);
2766 
2767   if (IsSimple) {
2768     nextToken();
2769     parseBracedList();
2770     addUnwrappedLine();
2771     return;
2772   }
2773 
2774   // Parse the body of a more complex enum.
2775   // First add a line for everything up to the "{".
2776   nextToken();
2777   addUnwrappedLine();
2778   ++Line->Level;
2779 
2780   // Parse the enum constants.
2781   while (FormatTok) {
2782     if (FormatTok->is(tok::l_brace)) {
2783       // Parse the constant's class body.
2784       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2785                  /*MunchSemi=*/false);
2786     } else if (FormatTok->is(tok::l_paren)) {
2787       parseParens();
2788     } else if (FormatTok->is(tok::comma)) {
2789       nextToken();
2790       addUnwrappedLine();
2791     } else if (FormatTok->is(tok::semi)) {
2792       nextToken();
2793       addUnwrappedLine();
2794       break;
2795     } else if (FormatTok->is(tok::r_brace)) {
2796       addUnwrappedLine();
2797       break;
2798     } else {
2799       nextToken();
2800     }
2801   }
2802 
2803   // Parse the class body after the enum's ";" if any.
2804   parseLevel(/*HasOpeningBrace=*/true);
2805   nextToken();
2806   --Line->Level;
2807   addUnwrappedLine();
2808 }
2809 
2810 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2811   const FormatToken &InitialToken = *FormatTok;
2812   nextToken();
2813 
2814   // The actual identifier can be a nested name specifier, and in macros
2815   // it is often token-pasted.
2816   // An [[attribute]] can be before the identifier.
2817   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2818                             tok::kw___attribute, tok::kw___declspec,
2819                             tok::kw_alignas, tok::l_square, tok::r_square) ||
2820          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2821           FormatTok->isOneOf(tok::period, tok::comma))) {
2822     if (Style.isJavaScript() &&
2823         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2824       // JavaScript/TypeScript supports inline object types in
2825       // extends/implements positions:
2826       //     class Foo implements {bar: number} { }
2827       nextToken();
2828       if (FormatTok->is(tok::l_brace)) {
2829         tryToParseBracedList();
2830         continue;
2831       }
2832     }
2833     bool IsNonMacroIdentifier =
2834         FormatTok->is(tok::identifier) &&
2835         FormatTok->TokenText != FormatTok->TokenText.upper();
2836     nextToken();
2837     // We can have macros or attributes in between 'class' and the class name.
2838     if (!IsNonMacroIdentifier) {
2839       if (FormatTok->Tok.is(tok::l_paren)) {
2840         parseParens();
2841       } else if (FormatTok->is(TT_AttributeSquare)) {
2842         parseSquare();
2843         // Consume the closing TT_AttributeSquare.
2844         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2845           nextToken();
2846       }
2847     }
2848   }
2849 
2850   // Note that parsing away template declarations here leads to incorrectly
2851   // accepting function declarations as record declarations.
2852   // In general, we cannot solve this problem. Consider:
2853   // class A<int> B() {}
2854   // which can be a function definition or a class definition when B() is a
2855   // macro. If we find enough real-world cases where this is a problem, we
2856   // can parse for the 'template' keyword in the beginning of the statement,
2857   // and thus rule out the record production in case there is no template
2858   // (this would still leave us with an ambiguity between template function
2859   // and class declarations).
2860   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2861     while (!eof()) {
2862       if (FormatTok->is(tok::l_brace)) {
2863         calculateBraceTypes(/*ExpectClassBody=*/true);
2864         if (!tryToParseBracedList())
2865           break;
2866       }
2867       if (FormatTok->Tok.is(tok::semi))
2868         return;
2869       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2870         addUnwrappedLine();
2871         nextToken();
2872         parseCSharpGenericTypeConstraint();
2873         break;
2874       }
2875       nextToken();
2876     }
2877   }
2878   if (FormatTok->Tok.is(tok::l_brace)) {
2879     if (ParseAsExpr) {
2880       parseChildBlock();
2881     } else {
2882       if (ShouldBreakBeforeBrace(Style, InitialToken))
2883         addUnwrappedLine();
2884 
2885       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
2886       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
2887     }
2888   }
2889   // There is no addUnwrappedLine() here so that we fall through to parsing a
2890   // structural element afterwards. Thus, in "class A {} n, m;",
2891   // "} n, m;" will end up in one unwrapped line.
2892 }
2893 
2894 void UnwrappedLineParser::parseObjCMethod() {
2895   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2896          "'(' or identifier expected.");
2897   do {
2898     if (FormatTok->Tok.is(tok::semi)) {
2899       nextToken();
2900       addUnwrappedLine();
2901       return;
2902     } else if (FormatTok->Tok.is(tok::l_brace)) {
2903       if (Style.BraceWrapping.AfterFunction)
2904         addUnwrappedLine();
2905       parseBlock();
2906       addUnwrappedLine();
2907       return;
2908     } else {
2909       nextToken();
2910     }
2911   } while (!eof());
2912 }
2913 
2914 void UnwrappedLineParser::parseObjCProtocolList() {
2915   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2916   do {
2917     nextToken();
2918     // Early exit in case someone forgot a close angle.
2919     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2920         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2921       return;
2922   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2923   nextToken(); // Skip '>'.
2924 }
2925 
2926 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2927   do {
2928     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2929       nextToken();
2930       addUnwrappedLine();
2931       break;
2932     }
2933     if (FormatTok->is(tok::l_brace)) {
2934       parseBlock();
2935       // In ObjC interfaces, nothing should be following the "}".
2936       addUnwrappedLine();
2937     } else if (FormatTok->is(tok::r_brace)) {
2938       // Ignore stray "}". parseStructuralElement doesn't consume them.
2939       nextToken();
2940       addUnwrappedLine();
2941     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2942       nextToken();
2943       parseObjCMethod();
2944     } else {
2945       parseStructuralElement();
2946     }
2947   } while (!eof());
2948 }
2949 
2950 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2951   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2952          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2953   nextToken();
2954   nextToken(); // interface name
2955 
2956   // @interface can be followed by a lightweight generic
2957   // specialization list, then either a base class or a category.
2958   if (FormatTok->Tok.is(tok::less)) {
2959     parseObjCLightweightGenerics();
2960   }
2961   if (FormatTok->Tok.is(tok::colon)) {
2962     nextToken();
2963     nextToken(); // base class name
2964     // The base class can also have lightweight generics applied to it.
2965     if (FormatTok->Tok.is(tok::less)) {
2966       parseObjCLightweightGenerics();
2967     }
2968   } else if (FormatTok->Tok.is(tok::l_paren))
2969     // Skip category, if present.
2970     parseParens();
2971 
2972   if (FormatTok->Tok.is(tok::less))
2973     parseObjCProtocolList();
2974 
2975   if (FormatTok->Tok.is(tok::l_brace)) {
2976     if (Style.BraceWrapping.AfterObjCDeclaration)
2977       addUnwrappedLine();
2978     parseBlock(/*MustBeDeclaration=*/true);
2979   }
2980 
2981   // With instance variables, this puts '}' on its own line.  Without instance
2982   // variables, this ends the @interface line.
2983   addUnwrappedLine();
2984 
2985   parseObjCUntilAtEnd();
2986 }
2987 
2988 void UnwrappedLineParser::parseObjCLightweightGenerics() {
2989   assert(FormatTok->Tok.is(tok::less));
2990   // Unlike protocol lists, generic parameterizations support
2991   // nested angles:
2992   //
2993   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2994   //     NSObject <NSCopying, NSSecureCoding>
2995   //
2996   // so we need to count how many open angles we have left.
2997   unsigned NumOpenAngles = 1;
2998   do {
2999     nextToken();
3000     // Early exit in case someone forgot a close angle.
3001     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3002         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3003       break;
3004     if (FormatTok->Tok.is(tok::less))
3005       ++NumOpenAngles;
3006     else if (FormatTok->Tok.is(tok::greater)) {
3007       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3008       --NumOpenAngles;
3009     }
3010   } while (!eof() && NumOpenAngles != 0);
3011   nextToken(); // Skip '>'.
3012 }
3013 
3014 // Returns true for the declaration/definition form of @protocol,
3015 // false for the expression form.
3016 bool UnwrappedLineParser::parseObjCProtocol() {
3017   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3018   nextToken();
3019 
3020   if (FormatTok->is(tok::l_paren))
3021     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3022     return false;
3023 
3024   // The definition/declaration form,
3025   // @protocol Foo
3026   // - (int)someMethod;
3027   // @end
3028 
3029   nextToken(); // protocol name
3030 
3031   if (FormatTok->Tok.is(tok::less))
3032     parseObjCProtocolList();
3033 
3034   // Check for protocol declaration.
3035   if (FormatTok->Tok.is(tok::semi)) {
3036     nextToken();
3037     addUnwrappedLine();
3038     return true;
3039   }
3040 
3041   addUnwrappedLine();
3042   parseObjCUntilAtEnd();
3043   return true;
3044 }
3045 
3046 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3047   bool IsImport = FormatTok->is(Keywords.kw_import);
3048   assert(IsImport || FormatTok->is(tok::kw_export));
3049   nextToken();
3050 
3051   // Consume the "default" in "export default class/function".
3052   if (FormatTok->is(tok::kw_default))
3053     nextToken();
3054 
3055   // Consume "async function", "function" and "default function", so that these
3056   // get parsed as free-standing JS functions, i.e. do not require a trailing
3057   // semicolon.
3058   if (FormatTok->is(Keywords.kw_async))
3059     nextToken();
3060   if (FormatTok->is(Keywords.kw_function)) {
3061     nextToken();
3062     return;
3063   }
3064 
3065   // For imports, `export *`, `export {...}`, consume the rest of the line up
3066   // to the terminating `;`. For everything else, just return and continue
3067   // parsing the structural element, i.e. the declaration or expression for
3068   // `export default`.
3069   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3070       !FormatTok->isStringLiteral())
3071     return;
3072 
3073   while (!eof()) {
3074     if (FormatTok->is(tok::semi))
3075       return;
3076     if (Line->Tokens.empty()) {
3077       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3078       // import statement should terminate.
3079       return;
3080     }
3081     if (FormatTok->is(tok::l_brace)) {
3082       FormatTok->setBlockKind(BK_Block);
3083       nextToken();
3084       parseBracedList();
3085     } else {
3086       nextToken();
3087     }
3088   }
3089 }
3090 
3091 void UnwrappedLineParser::parseStatementMacro() {
3092   nextToken();
3093   if (FormatTok->is(tok::l_paren))
3094     parseParens();
3095   if (FormatTok->is(tok::semi))
3096     nextToken();
3097   addUnwrappedLine();
3098 }
3099 
3100 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3101                                                  StringRef Prefix = "") {
3102   llvm::dbgs() << Prefix << "Line(" << Line.Level
3103                << ", FSC=" << Line.FirstStartColumn << ")"
3104                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3105   for (const auto &Node : Line.Tokens) {
3106     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3107                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3108                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3109   }
3110   for (const auto &Node : Line.Tokens)
3111     for (const auto &ChildNode : Node.Children)
3112       printDebugInfo(ChildNode, "\nChild: ");
3113 
3114   llvm::dbgs() << "\n";
3115 }
3116 
3117 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3118   if (Line->Tokens.empty())
3119     return;
3120   LLVM_DEBUG({
3121     if (CurrentLines == &Lines)
3122       printDebugInfo(*Line);
3123   });
3124 
3125   // If this line closes a block when in Whitesmiths mode, remember that
3126   // information so that the level can be decreased after the line is added.
3127   // This has to happen after the addition of the line since the line itself
3128   // needs to be indented.
3129   bool ClosesWhitesmithsBlock =
3130       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3131       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3132 
3133   CurrentLines->push_back(std::move(*Line));
3134   Line->Tokens.clear();
3135   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3136   Line->FirstStartColumn = 0;
3137 
3138   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3139     --Line->Level;
3140   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3141     CurrentLines->append(
3142         std::make_move_iterator(PreprocessorDirectives.begin()),
3143         std::make_move_iterator(PreprocessorDirectives.end()));
3144     PreprocessorDirectives.clear();
3145   }
3146   // Disconnect the current token from the last token on the previous line.
3147   FormatTok->Previous = nullptr;
3148 }
3149 
3150 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3151 
3152 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3153   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3154          FormatTok.NewlinesBefore > 0;
3155 }
3156 
3157 // Checks if \p FormatTok is a line comment that continues the line comment
3158 // section on \p Line.
3159 static bool
3160 continuesLineCommentSection(const FormatToken &FormatTok,
3161                             const UnwrappedLine &Line,
3162                             const llvm::Regex &CommentPragmasRegex) {
3163   if (Line.Tokens.empty())
3164     return false;
3165 
3166   StringRef IndentContent = FormatTok.TokenText;
3167   if (FormatTok.TokenText.startswith("//") ||
3168       FormatTok.TokenText.startswith("/*"))
3169     IndentContent = FormatTok.TokenText.substr(2);
3170   if (CommentPragmasRegex.match(IndentContent))
3171     return false;
3172 
3173   // If Line starts with a line comment, then FormatTok continues the comment
3174   // section if its original column is greater or equal to the original start
3175   // column of the line.
3176   //
3177   // Define the min column token of a line as follows: if a line ends in '{' or
3178   // contains a '{' followed by a line comment, then the min column token is
3179   // that '{'. Otherwise, the min column token of the line is the first token of
3180   // the line.
3181   //
3182   // If Line starts with a token other than a line comment, then FormatTok
3183   // continues the comment section if its original column is greater than the
3184   // original start column of the min column token of the line.
3185   //
3186   // For example, the second line comment continues the first in these cases:
3187   //
3188   // // first line
3189   // // second line
3190   //
3191   // and:
3192   //
3193   // // first line
3194   //  // second line
3195   //
3196   // and:
3197   //
3198   // int i; // first line
3199   //  // second line
3200   //
3201   // and:
3202   //
3203   // do { // first line
3204   //      // second line
3205   //   int i;
3206   // } while (true);
3207   //
3208   // and:
3209   //
3210   // enum {
3211   //   a, // first line
3212   //    // second line
3213   //   b
3214   // };
3215   //
3216   // The second line comment doesn't continue the first in these cases:
3217   //
3218   //   // first line
3219   //  // second line
3220   //
3221   // and:
3222   //
3223   // int i; // first line
3224   // // second line
3225   //
3226   // and:
3227   //
3228   // do { // first line
3229   //   // second line
3230   //   int i;
3231   // } while (true);
3232   //
3233   // and:
3234   //
3235   // enum {
3236   //   a, // first line
3237   //   // second line
3238   // };
3239   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3240 
3241   // Scan for '{//'. If found, use the column of '{' as a min column for line
3242   // comment section continuation.
3243   const FormatToken *PreviousToken = nullptr;
3244   for (const UnwrappedLineNode &Node : Line.Tokens) {
3245     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3246         isLineComment(*Node.Tok)) {
3247       MinColumnToken = PreviousToken;
3248       break;
3249     }
3250     PreviousToken = Node.Tok;
3251 
3252     // Grab the last newline preceding a token in this unwrapped line.
3253     if (Node.Tok->NewlinesBefore > 0) {
3254       MinColumnToken = Node.Tok;
3255     }
3256   }
3257   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3258     MinColumnToken = PreviousToken;
3259   }
3260 
3261   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3262                               MinColumnToken);
3263 }
3264 
3265 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3266   bool JustComments = Line->Tokens.empty();
3267   for (SmallVectorImpl<FormatToken *>::const_iterator
3268            I = CommentsBeforeNextToken.begin(),
3269            E = CommentsBeforeNextToken.end();
3270        I != E; ++I) {
3271     // Line comments that belong to the same line comment section are put on the
3272     // same line since later we might want to reflow content between them.
3273     // Additional fine-grained breaking of line comment sections is controlled
3274     // by the class BreakableLineCommentSection in case it is desirable to keep
3275     // several line comment sections in the same unwrapped line.
3276     //
3277     // FIXME: Consider putting separate line comment sections as children to the
3278     // unwrapped line instead.
3279     (*I)->ContinuesLineCommentSection =
3280         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3281     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3282       addUnwrappedLine();
3283     pushToken(*I);
3284   }
3285   if (NewlineBeforeNext && JustComments)
3286     addUnwrappedLine();
3287   CommentsBeforeNextToken.clear();
3288 }
3289 
3290 void UnwrappedLineParser::nextToken(int LevelDifference) {
3291   if (eof())
3292     return;
3293   flushComments(isOnNewLine(*FormatTok));
3294   pushToken(FormatTok);
3295   FormatToken *Previous = FormatTok;
3296   if (!Style.isJavaScript())
3297     readToken(LevelDifference);
3298   else
3299     readTokenWithJavaScriptASI();
3300   FormatTok->Previous = Previous;
3301 }
3302 
3303 void UnwrappedLineParser::distributeComments(
3304     const SmallVectorImpl<FormatToken *> &Comments,
3305     const FormatToken *NextTok) {
3306   // Whether or not a line comment token continues a line is controlled by
3307   // the method continuesLineCommentSection, with the following caveat:
3308   //
3309   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3310   // that each comment line from the trail is aligned with the next token, if
3311   // the next token exists. If a trail exists, the beginning of the maximal
3312   // trail is marked as a start of a new comment section.
3313   //
3314   // For example in this code:
3315   //
3316   // int a; // line about a
3317   //   // line 1 about b
3318   //   // line 2 about b
3319   //   int b;
3320   //
3321   // the two lines about b form a maximal trail, so there are two sections, the
3322   // first one consisting of the single comment "// line about a" and the
3323   // second one consisting of the next two comments.
3324   if (Comments.empty())
3325     return;
3326   bool ShouldPushCommentsInCurrentLine = true;
3327   bool HasTrailAlignedWithNextToken = false;
3328   unsigned StartOfTrailAlignedWithNextToken = 0;
3329   if (NextTok) {
3330     // We are skipping the first element intentionally.
3331     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3332       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3333         HasTrailAlignedWithNextToken = true;
3334         StartOfTrailAlignedWithNextToken = i;
3335       }
3336     }
3337   }
3338   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3339     FormatToken *FormatTok = Comments[i];
3340     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3341       FormatTok->ContinuesLineCommentSection = false;
3342     } else {
3343       FormatTok->ContinuesLineCommentSection =
3344           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3345     }
3346     if (!FormatTok->ContinuesLineCommentSection &&
3347         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3348       ShouldPushCommentsInCurrentLine = false;
3349     }
3350     if (ShouldPushCommentsInCurrentLine) {
3351       pushToken(FormatTok);
3352     } else {
3353       CommentsBeforeNextToken.push_back(FormatTok);
3354     }
3355   }
3356 }
3357 
3358 void UnwrappedLineParser::readToken(int LevelDifference) {
3359   SmallVector<FormatToken *, 1> Comments;
3360   do {
3361     FormatTok = Tokens->getNextToken();
3362     assert(FormatTok);
3363     while (FormatTok->getType() == TT_ConflictStart ||
3364            FormatTok->getType() == TT_ConflictEnd ||
3365            FormatTok->getType() == TT_ConflictAlternative) {
3366       if (FormatTok->getType() == TT_ConflictStart) {
3367         conditionalCompilationStart(/*Unreachable=*/false);
3368       } else if (FormatTok->getType() == TT_ConflictAlternative) {
3369         conditionalCompilationAlternative();
3370       } else if (FormatTok->getType() == TT_ConflictEnd) {
3371         conditionalCompilationEnd();
3372       }
3373       FormatTok = Tokens->getNextToken();
3374       FormatTok->MustBreakBefore = true;
3375     }
3376 
3377     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3378            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3379       distributeComments(Comments, FormatTok);
3380       Comments.clear();
3381       // If there is an unfinished unwrapped line, we flush the preprocessor
3382       // directives only after that unwrapped line was finished later.
3383       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3384       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3385       assert((LevelDifference >= 0 ||
3386               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3387              "LevelDifference makes Line->Level negative");
3388       Line->Level += LevelDifference;
3389       // Comments stored before the preprocessor directive need to be output
3390       // before the preprocessor directive, at the same level as the
3391       // preprocessor directive, as we consider them to apply to the directive.
3392       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3393           PPBranchLevel > 0)
3394         Line->Level += PPBranchLevel;
3395       flushComments(isOnNewLine(*FormatTok));
3396       parsePPDirective();
3397     }
3398 
3399     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3400         !Line->InPPDirective) {
3401       continue;
3402     }
3403 
3404     if (!FormatTok->Tok.is(tok::comment)) {
3405       distributeComments(Comments, FormatTok);
3406       Comments.clear();
3407       return;
3408     }
3409 
3410     Comments.push_back(FormatTok);
3411   } while (!eof());
3412 
3413   distributeComments(Comments, nullptr);
3414   Comments.clear();
3415 }
3416 
3417 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3418   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3419   if (MustBreakBeforeNextToken) {
3420     Line->Tokens.back().Tok->MustBreakBefore = true;
3421     MustBreakBeforeNextToken = false;
3422   }
3423 }
3424 
3425 } // end namespace format
3426 } // end namespace clang
3427