xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/DefinitionBlockSeparator.cpp (revision 04eeddc0aa8e0a417a16eaf9d7d095207f4a8623)
1*04eeddc0SDimitry Andric //===--- DefinitionBlockSeparator.cpp ---------------------------*- C++ -*-===//
2*04eeddc0SDimitry Andric //
3*04eeddc0SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*04eeddc0SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*04eeddc0SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*04eeddc0SDimitry Andric //
7*04eeddc0SDimitry Andric //===----------------------------------------------------------------------===//
8*04eeddc0SDimitry Andric ///
9*04eeddc0SDimitry Andric /// \file
10*04eeddc0SDimitry Andric /// This file implements DefinitionBlockSeparator, a TokenAnalyzer that inserts
11*04eeddc0SDimitry Andric /// or removes empty lines separating definition blocks like classes, structs,
12*04eeddc0SDimitry Andric /// functions, enums, and namespaces in between.
13*04eeddc0SDimitry Andric ///
14*04eeddc0SDimitry Andric //===----------------------------------------------------------------------===//
15*04eeddc0SDimitry Andric 
16*04eeddc0SDimitry Andric #include "DefinitionBlockSeparator.h"
17*04eeddc0SDimitry Andric #include "llvm/Support/Debug.h"
18*04eeddc0SDimitry Andric #define DEBUG_TYPE "definition-block-separator"
19*04eeddc0SDimitry Andric 
20*04eeddc0SDimitry Andric namespace clang {
21*04eeddc0SDimitry Andric namespace format {
22*04eeddc0SDimitry Andric std::pair<tooling::Replacements, unsigned> DefinitionBlockSeparator::analyze(
23*04eeddc0SDimitry Andric     TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
24*04eeddc0SDimitry Andric     FormatTokenLexer &Tokens) {
25*04eeddc0SDimitry Andric   assert(Style.SeparateDefinitionBlocks != FormatStyle::SDS_Leave);
26*04eeddc0SDimitry Andric   AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
27*04eeddc0SDimitry Andric   tooling::Replacements Result;
28*04eeddc0SDimitry Andric   separateBlocks(AnnotatedLines, Result, Tokens);
29*04eeddc0SDimitry Andric   return {Result, 0};
30*04eeddc0SDimitry Andric }
31*04eeddc0SDimitry Andric 
32*04eeddc0SDimitry Andric void DefinitionBlockSeparator::separateBlocks(
33*04eeddc0SDimitry Andric     SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result,
34*04eeddc0SDimitry Andric     FormatTokenLexer &Tokens) {
35*04eeddc0SDimitry Andric   const bool IsNeverStyle =
36*04eeddc0SDimitry Andric       Style.SeparateDefinitionBlocks == FormatStyle::SDS_Never;
37*04eeddc0SDimitry Andric   const AdditionalKeywords &ExtraKeywords = Tokens.getKeywords();
38*04eeddc0SDimitry Andric   auto LikelyDefinition = [this, ExtraKeywords](const AnnotatedLine *Line,
39*04eeddc0SDimitry Andric                                                 bool ExcludeEnum = false) {
40*04eeddc0SDimitry Andric     if ((Line->MightBeFunctionDecl && Line->mightBeFunctionDefinition()) ||
41*04eeddc0SDimitry Andric         Line->startsWithNamespace())
42*04eeddc0SDimitry Andric       return true;
43*04eeddc0SDimitry Andric     FormatToken *CurrentToken = Line->First;
44*04eeddc0SDimitry Andric     while (CurrentToken) {
45*04eeddc0SDimitry Andric       if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct) ||
46*04eeddc0SDimitry Andric           (Style.isJavaScript() && CurrentToken->is(ExtraKeywords.kw_function)))
47*04eeddc0SDimitry Andric         return true;
48*04eeddc0SDimitry Andric       if (!ExcludeEnum && CurrentToken->is(tok::kw_enum))
49*04eeddc0SDimitry Andric         return true;
50*04eeddc0SDimitry Andric       CurrentToken = CurrentToken->Next;
51*04eeddc0SDimitry Andric     }
52*04eeddc0SDimitry Andric     return false;
53*04eeddc0SDimitry Andric   };
54*04eeddc0SDimitry Andric   unsigned NewlineCount =
55*04eeddc0SDimitry Andric       (Style.SeparateDefinitionBlocks == FormatStyle::SDS_Always ? 1 : 0) + 1;
56*04eeddc0SDimitry Andric   WhitespaceManager Whitespaces(
57*04eeddc0SDimitry Andric       Env.getSourceManager(), Style,
58*04eeddc0SDimitry Andric       Style.DeriveLineEnding
59*04eeddc0SDimitry Andric           ? WhitespaceManager::inputUsesCRLF(
60*04eeddc0SDimitry Andric                 Env.getSourceManager().getBufferData(Env.getFileID()),
61*04eeddc0SDimitry Andric                 Style.UseCRLF)
62*04eeddc0SDimitry Andric           : Style.UseCRLF);
63*04eeddc0SDimitry Andric   for (unsigned I = 0; I < Lines.size(); ++I) {
64*04eeddc0SDimitry Andric     const auto &CurrentLine = Lines[I];
65*04eeddc0SDimitry Andric     if (CurrentLine->InPPDirective)
66*04eeddc0SDimitry Andric       continue;
67*04eeddc0SDimitry Andric     FormatToken *TargetToken = nullptr;
68*04eeddc0SDimitry Andric     AnnotatedLine *TargetLine;
69*04eeddc0SDimitry Andric     auto OpeningLineIndex = CurrentLine->MatchingOpeningBlockLineIndex;
70*04eeddc0SDimitry Andric     AnnotatedLine *OpeningLine = nullptr;
71*04eeddc0SDimitry Andric     const auto IsAccessSpecifierToken = [](const FormatToken *Token) {
72*04eeddc0SDimitry Andric       return Token->isAccessSpecifier() || Token->isObjCAccessSpecifier();
73*04eeddc0SDimitry Andric     };
74*04eeddc0SDimitry Andric     const auto InsertReplacement = [&](const int NewlineToInsert) {
75*04eeddc0SDimitry Andric       assert(TargetLine);
76*04eeddc0SDimitry Andric       assert(TargetToken);
77*04eeddc0SDimitry Andric 
78*04eeddc0SDimitry Andric       // Do not handle EOF newlines.
79*04eeddc0SDimitry Andric       if (TargetToken->is(tok::eof))
80*04eeddc0SDimitry Andric         return;
81*04eeddc0SDimitry Andric       if (IsAccessSpecifierToken(TargetToken) ||
82*04eeddc0SDimitry Andric           (OpeningLineIndex > 0 &&
83*04eeddc0SDimitry Andric            IsAccessSpecifierToken(Lines[OpeningLineIndex - 1]->First)))
84*04eeddc0SDimitry Andric         return;
85*04eeddc0SDimitry Andric       if (!TargetLine->Affected)
86*04eeddc0SDimitry Andric         return;
87*04eeddc0SDimitry Andric       Whitespaces.replaceWhitespace(*TargetToken, NewlineToInsert,
88*04eeddc0SDimitry Andric                                     TargetToken->OriginalColumn,
89*04eeddc0SDimitry Andric                                     TargetToken->OriginalColumn);
90*04eeddc0SDimitry Andric     };
91*04eeddc0SDimitry Andric     const auto IsPPConditional = [&](const size_t LineIndex) {
92*04eeddc0SDimitry Andric       const auto &Line = Lines[LineIndex];
93*04eeddc0SDimitry Andric       return Line->First->is(tok::hash) && Line->First->Next &&
94*04eeddc0SDimitry Andric              Line->First->Next->isOneOf(tok::pp_if, tok::pp_ifdef, tok::pp_else,
95*04eeddc0SDimitry Andric                                         tok::pp_ifndef, tok::pp_elifndef,
96*04eeddc0SDimitry Andric                                         tok::pp_elifdef, tok::pp_elif,
97*04eeddc0SDimitry Andric                                         tok::pp_endif);
98*04eeddc0SDimitry Andric     };
99*04eeddc0SDimitry Andric     const auto FollowingOtherOpening = [&]() {
100*04eeddc0SDimitry Andric       return OpeningLineIndex == 0 ||
101*04eeddc0SDimitry Andric              Lines[OpeningLineIndex - 1]->Last->opensScope() ||
102*04eeddc0SDimitry Andric              IsPPConditional(OpeningLineIndex - 1);
103*04eeddc0SDimitry Andric     };
104*04eeddc0SDimitry Andric     const auto HasEnumOnLine = [&]() {
105*04eeddc0SDimitry Andric       FormatToken *CurrentToken = CurrentLine->First;
106*04eeddc0SDimitry Andric       bool FoundEnumKeyword = false;
107*04eeddc0SDimitry Andric       while (CurrentToken) {
108*04eeddc0SDimitry Andric         if (CurrentToken->is(tok::kw_enum))
109*04eeddc0SDimitry Andric           FoundEnumKeyword = true;
110*04eeddc0SDimitry Andric         else if (FoundEnumKeyword && CurrentToken->is(tok::l_brace))
111*04eeddc0SDimitry Andric           return true;
112*04eeddc0SDimitry Andric         CurrentToken = CurrentToken->Next;
113*04eeddc0SDimitry Andric       }
114*04eeddc0SDimitry Andric       return FoundEnumKeyword && I + 1 < Lines.size() &&
115*04eeddc0SDimitry Andric              Lines[I + 1]->First->is(tok::l_brace);
116*04eeddc0SDimitry Andric     };
117*04eeddc0SDimitry Andric 
118*04eeddc0SDimitry Andric     bool IsDefBlock = false;
119*04eeddc0SDimitry Andric     const auto MayPrecedeDefinition = [&](const int Direction = -1) {
120*04eeddc0SDimitry Andric       assert(Direction >= -1);
121*04eeddc0SDimitry Andric       assert(Direction <= 1);
122*04eeddc0SDimitry Andric       const size_t OperateIndex = OpeningLineIndex + Direction;
123*04eeddc0SDimitry Andric       assert(OperateIndex < Lines.size());
124*04eeddc0SDimitry Andric       const auto &OperateLine = Lines[OperateIndex];
125*04eeddc0SDimitry Andric       if (LikelyDefinition(OperateLine))
126*04eeddc0SDimitry Andric         return false;
127*04eeddc0SDimitry Andric 
128*04eeddc0SDimitry Andric       if (OperateLine->First->is(tok::comment))
129*04eeddc0SDimitry Andric         return true;
130*04eeddc0SDimitry Andric 
131*04eeddc0SDimitry Andric       // A single line identifier that is not in the last line.
132*04eeddc0SDimitry Andric       if (OperateLine->First->is(tok::identifier) &&
133*04eeddc0SDimitry Andric           OperateLine->First == OperateLine->Last &&
134*04eeddc0SDimitry Andric           OperateIndex + 1 < Lines.size()) {
135*04eeddc0SDimitry Andric         // UnwrappedLineParser's recognition of free-standing macro like
136*04eeddc0SDimitry Andric         // Q_OBJECT may also recognize some uppercased type names that may be
137*04eeddc0SDimitry Andric         // used as return type as that kind of macros, which is a bit hard to
138*04eeddc0SDimitry Andric         // distinguish one from another purely from token patterns. Here, we
139*04eeddc0SDimitry Andric         // try not to add new lines below those identifiers.
140*04eeddc0SDimitry Andric         AnnotatedLine *NextLine = Lines[OperateIndex + 1];
141*04eeddc0SDimitry Andric         if (NextLine->MightBeFunctionDecl &&
142*04eeddc0SDimitry Andric             NextLine->mightBeFunctionDefinition() &&
143*04eeddc0SDimitry Andric             NextLine->First->NewlinesBefore == 1 &&
144*04eeddc0SDimitry Andric             OperateLine->First->is(TT_FunctionLikeOrFreestandingMacro))
145*04eeddc0SDimitry Andric           return true;
146*04eeddc0SDimitry Andric       }
147*04eeddc0SDimitry Andric 
148*04eeddc0SDimitry Andric       if ((Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare)))
149*04eeddc0SDimitry Andric         return true;
150*04eeddc0SDimitry Andric       return false;
151*04eeddc0SDimitry Andric     };
152*04eeddc0SDimitry Andric 
153*04eeddc0SDimitry Andric     if (HasEnumOnLine() &&
154*04eeddc0SDimitry Andric         !LikelyDefinition(CurrentLine, /*ExcludeEnum=*/true)) {
155*04eeddc0SDimitry Andric       // We have no scope opening/closing information for enum.
156*04eeddc0SDimitry Andric       IsDefBlock = true;
157*04eeddc0SDimitry Andric       OpeningLineIndex = I;
158*04eeddc0SDimitry Andric       while (OpeningLineIndex > 0 && MayPrecedeDefinition())
159*04eeddc0SDimitry Andric         --OpeningLineIndex;
160*04eeddc0SDimitry Andric       OpeningLine = Lines[OpeningLineIndex];
161*04eeddc0SDimitry Andric       TargetLine = OpeningLine;
162*04eeddc0SDimitry Andric       TargetToken = TargetLine->First;
163*04eeddc0SDimitry Andric       if (!FollowingOtherOpening())
164*04eeddc0SDimitry Andric         InsertReplacement(NewlineCount);
165*04eeddc0SDimitry Andric       else if (IsNeverStyle)
166*04eeddc0SDimitry Andric         InsertReplacement(OpeningLineIndex != 0);
167*04eeddc0SDimitry Andric       TargetLine = CurrentLine;
168*04eeddc0SDimitry Andric       TargetToken = TargetLine->First;
169*04eeddc0SDimitry Andric       while (TargetToken && !TargetToken->is(tok::r_brace))
170*04eeddc0SDimitry Andric         TargetToken = TargetToken->Next;
171*04eeddc0SDimitry Andric       if (!TargetToken) {
172*04eeddc0SDimitry Andric         while (I < Lines.size() && !Lines[I]->First->is(tok::r_brace))
173*04eeddc0SDimitry Andric           ++I;
174*04eeddc0SDimitry Andric       }
175*04eeddc0SDimitry Andric     } else if (CurrentLine->First->closesScope()) {
176*04eeddc0SDimitry Andric       if (OpeningLineIndex > Lines.size())
177*04eeddc0SDimitry Andric         continue;
178*04eeddc0SDimitry Andric       // Handling the case that opening brace has its own line, with checking
179*04eeddc0SDimitry Andric       // whether the last line already had an opening brace to guard against
180*04eeddc0SDimitry Andric       // misrecognition.
181*04eeddc0SDimitry Andric       if (OpeningLineIndex > 0 &&
182*04eeddc0SDimitry Andric           Lines[OpeningLineIndex]->First->is(tok::l_brace) &&
183*04eeddc0SDimitry Andric           Lines[OpeningLineIndex - 1]->Last->isNot(tok::l_brace))
184*04eeddc0SDimitry Andric         --OpeningLineIndex;
185*04eeddc0SDimitry Andric       OpeningLine = Lines[OpeningLineIndex];
186*04eeddc0SDimitry Andric       // Closing a function definition.
187*04eeddc0SDimitry Andric       if (LikelyDefinition(OpeningLine)) {
188*04eeddc0SDimitry Andric         IsDefBlock = true;
189*04eeddc0SDimitry Andric         while (OpeningLineIndex > 0 && MayPrecedeDefinition())
190*04eeddc0SDimitry Andric           --OpeningLineIndex;
191*04eeddc0SDimitry Andric         OpeningLine = Lines[OpeningLineIndex];
192*04eeddc0SDimitry Andric         TargetLine = OpeningLine;
193*04eeddc0SDimitry Andric         TargetToken = TargetLine->First;
194*04eeddc0SDimitry Andric         if (!FollowingOtherOpening()) {
195*04eeddc0SDimitry Andric           // Avoid duplicated replacement.
196*04eeddc0SDimitry Andric           if (TargetToken->isNot(tok::l_brace))
197*04eeddc0SDimitry Andric             InsertReplacement(NewlineCount);
198*04eeddc0SDimitry Andric         } else if (IsNeverStyle)
199*04eeddc0SDimitry Andric           InsertReplacement(OpeningLineIndex != 0);
200*04eeddc0SDimitry Andric       }
201*04eeddc0SDimitry Andric     }
202*04eeddc0SDimitry Andric 
203*04eeddc0SDimitry Andric     // Not the last token.
204*04eeddc0SDimitry Andric     if (IsDefBlock && I + 1 < Lines.size()) {
205*04eeddc0SDimitry Andric       OpeningLineIndex = I + 1;
206*04eeddc0SDimitry Andric       TargetLine = Lines[OpeningLineIndex];
207*04eeddc0SDimitry Andric       TargetToken = TargetLine->First;
208*04eeddc0SDimitry Andric 
209*04eeddc0SDimitry Andric       // No empty line for continuously closing scopes. The token will be
210*04eeddc0SDimitry Andric       // handled in another case if the line following is opening a
211*04eeddc0SDimitry Andric       // definition.
212*04eeddc0SDimitry Andric       if (!TargetToken->closesScope() && !IsPPConditional(OpeningLineIndex)) {
213*04eeddc0SDimitry Andric         // Check whether current line may precede a definition line.
214*04eeddc0SDimitry Andric         while (OpeningLineIndex + 1 < Lines.size() &&
215*04eeddc0SDimitry Andric                MayPrecedeDefinition(/*Direction=*/0))
216*04eeddc0SDimitry Andric           ++OpeningLineIndex;
217*04eeddc0SDimitry Andric         TargetLine = Lines[OpeningLineIndex];
218*04eeddc0SDimitry Andric         if (!LikelyDefinition(TargetLine)) {
219*04eeddc0SDimitry Andric           OpeningLineIndex = I + 1;
220*04eeddc0SDimitry Andric           TargetLine = Lines[I + 1];
221*04eeddc0SDimitry Andric           TargetToken = TargetLine->First;
222*04eeddc0SDimitry Andric           InsertReplacement(NewlineCount);
223*04eeddc0SDimitry Andric         }
224*04eeddc0SDimitry Andric       } else if (IsNeverStyle)
225*04eeddc0SDimitry Andric         InsertReplacement(/*NewlineToInsert=*/1);
226*04eeddc0SDimitry Andric     }
227*04eeddc0SDimitry Andric   }
228*04eeddc0SDimitry Andric   for (const auto &R : Whitespaces.generateReplacements())
229*04eeddc0SDimitry Andric     // The add method returns an Error instance which simulates program exit
230*04eeddc0SDimitry Andric     // code through overloading boolean operator, thus false here indicates
231*04eeddc0SDimitry Andric     // success.
232*04eeddc0SDimitry Andric     if (Result.add(R))
233*04eeddc0SDimitry Andric       return;
234*04eeddc0SDimitry Andric }
235*04eeddc0SDimitry Andric } // namespace format
236*04eeddc0SDimitry Andric } // namespace clang
237