1*04eeddc0SDimitry Andric //===--- DefinitionBlockSeparator.cpp ---------------------------*- C++ -*-===// 2*04eeddc0SDimitry Andric // 3*04eeddc0SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*04eeddc0SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*04eeddc0SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*04eeddc0SDimitry Andric // 7*04eeddc0SDimitry Andric //===----------------------------------------------------------------------===// 8*04eeddc0SDimitry Andric /// 9*04eeddc0SDimitry Andric /// \file 10*04eeddc0SDimitry Andric /// This file implements DefinitionBlockSeparator, a TokenAnalyzer that inserts 11*04eeddc0SDimitry Andric /// or removes empty lines separating definition blocks like classes, structs, 12*04eeddc0SDimitry Andric /// functions, enums, and namespaces in between. 13*04eeddc0SDimitry Andric /// 14*04eeddc0SDimitry Andric //===----------------------------------------------------------------------===// 15*04eeddc0SDimitry Andric 16*04eeddc0SDimitry Andric #include "DefinitionBlockSeparator.h" 17*04eeddc0SDimitry Andric #include "llvm/Support/Debug.h" 18*04eeddc0SDimitry Andric #define DEBUG_TYPE "definition-block-separator" 19*04eeddc0SDimitry Andric 20*04eeddc0SDimitry Andric namespace clang { 21*04eeddc0SDimitry Andric namespace format { 22*04eeddc0SDimitry Andric std::pair<tooling::Replacements, unsigned> DefinitionBlockSeparator::analyze( 23*04eeddc0SDimitry Andric TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 24*04eeddc0SDimitry Andric FormatTokenLexer &Tokens) { 25*04eeddc0SDimitry Andric assert(Style.SeparateDefinitionBlocks != FormatStyle::SDS_Leave); 26*04eeddc0SDimitry Andric AffectedRangeMgr.computeAffectedLines(AnnotatedLines); 27*04eeddc0SDimitry Andric tooling::Replacements Result; 28*04eeddc0SDimitry Andric separateBlocks(AnnotatedLines, Result, Tokens); 29*04eeddc0SDimitry Andric return {Result, 0}; 30*04eeddc0SDimitry Andric } 31*04eeddc0SDimitry Andric 32*04eeddc0SDimitry Andric void DefinitionBlockSeparator::separateBlocks( 33*04eeddc0SDimitry Andric SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result, 34*04eeddc0SDimitry Andric FormatTokenLexer &Tokens) { 35*04eeddc0SDimitry Andric const bool IsNeverStyle = 36*04eeddc0SDimitry Andric Style.SeparateDefinitionBlocks == FormatStyle::SDS_Never; 37*04eeddc0SDimitry Andric const AdditionalKeywords &ExtraKeywords = Tokens.getKeywords(); 38*04eeddc0SDimitry Andric auto LikelyDefinition = [this, ExtraKeywords](const AnnotatedLine *Line, 39*04eeddc0SDimitry Andric bool ExcludeEnum = false) { 40*04eeddc0SDimitry Andric if ((Line->MightBeFunctionDecl && Line->mightBeFunctionDefinition()) || 41*04eeddc0SDimitry Andric Line->startsWithNamespace()) 42*04eeddc0SDimitry Andric return true; 43*04eeddc0SDimitry Andric FormatToken *CurrentToken = Line->First; 44*04eeddc0SDimitry Andric while (CurrentToken) { 45*04eeddc0SDimitry Andric if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct) || 46*04eeddc0SDimitry Andric (Style.isJavaScript() && CurrentToken->is(ExtraKeywords.kw_function))) 47*04eeddc0SDimitry Andric return true; 48*04eeddc0SDimitry Andric if (!ExcludeEnum && CurrentToken->is(tok::kw_enum)) 49*04eeddc0SDimitry Andric return true; 50*04eeddc0SDimitry Andric CurrentToken = CurrentToken->Next; 51*04eeddc0SDimitry Andric } 52*04eeddc0SDimitry Andric return false; 53*04eeddc0SDimitry Andric }; 54*04eeddc0SDimitry Andric unsigned NewlineCount = 55*04eeddc0SDimitry Andric (Style.SeparateDefinitionBlocks == FormatStyle::SDS_Always ? 1 : 0) + 1; 56*04eeddc0SDimitry Andric WhitespaceManager Whitespaces( 57*04eeddc0SDimitry Andric Env.getSourceManager(), Style, 58*04eeddc0SDimitry Andric Style.DeriveLineEnding 59*04eeddc0SDimitry Andric ? WhitespaceManager::inputUsesCRLF( 60*04eeddc0SDimitry Andric Env.getSourceManager().getBufferData(Env.getFileID()), 61*04eeddc0SDimitry Andric Style.UseCRLF) 62*04eeddc0SDimitry Andric : Style.UseCRLF); 63*04eeddc0SDimitry Andric for (unsigned I = 0; I < Lines.size(); ++I) { 64*04eeddc0SDimitry Andric const auto &CurrentLine = Lines[I]; 65*04eeddc0SDimitry Andric if (CurrentLine->InPPDirective) 66*04eeddc0SDimitry Andric continue; 67*04eeddc0SDimitry Andric FormatToken *TargetToken = nullptr; 68*04eeddc0SDimitry Andric AnnotatedLine *TargetLine; 69*04eeddc0SDimitry Andric auto OpeningLineIndex = CurrentLine->MatchingOpeningBlockLineIndex; 70*04eeddc0SDimitry Andric AnnotatedLine *OpeningLine = nullptr; 71*04eeddc0SDimitry Andric const auto IsAccessSpecifierToken = [](const FormatToken *Token) { 72*04eeddc0SDimitry Andric return Token->isAccessSpecifier() || Token->isObjCAccessSpecifier(); 73*04eeddc0SDimitry Andric }; 74*04eeddc0SDimitry Andric const auto InsertReplacement = [&](const int NewlineToInsert) { 75*04eeddc0SDimitry Andric assert(TargetLine); 76*04eeddc0SDimitry Andric assert(TargetToken); 77*04eeddc0SDimitry Andric 78*04eeddc0SDimitry Andric // Do not handle EOF newlines. 79*04eeddc0SDimitry Andric if (TargetToken->is(tok::eof)) 80*04eeddc0SDimitry Andric return; 81*04eeddc0SDimitry Andric if (IsAccessSpecifierToken(TargetToken) || 82*04eeddc0SDimitry Andric (OpeningLineIndex > 0 && 83*04eeddc0SDimitry Andric IsAccessSpecifierToken(Lines[OpeningLineIndex - 1]->First))) 84*04eeddc0SDimitry Andric return; 85*04eeddc0SDimitry Andric if (!TargetLine->Affected) 86*04eeddc0SDimitry Andric return; 87*04eeddc0SDimitry Andric Whitespaces.replaceWhitespace(*TargetToken, NewlineToInsert, 88*04eeddc0SDimitry Andric TargetToken->OriginalColumn, 89*04eeddc0SDimitry Andric TargetToken->OriginalColumn); 90*04eeddc0SDimitry Andric }; 91*04eeddc0SDimitry Andric const auto IsPPConditional = [&](const size_t LineIndex) { 92*04eeddc0SDimitry Andric const auto &Line = Lines[LineIndex]; 93*04eeddc0SDimitry Andric return Line->First->is(tok::hash) && Line->First->Next && 94*04eeddc0SDimitry Andric Line->First->Next->isOneOf(tok::pp_if, tok::pp_ifdef, tok::pp_else, 95*04eeddc0SDimitry Andric tok::pp_ifndef, tok::pp_elifndef, 96*04eeddc0SDimitry Andric tok::pp_elifdef, tok::pp_elif, 97*04eeddc0SDimitry Andric tok::pp_endif); 98*04eeddc0SDimitry Andric }; 99*04eeddc0SDimitry Andric const auto FollowingOtherOpening = [&]() { 100*04eeddc0SDimitry Andric return OpeningLineIndex == 0 || 101*04eeddc0SDimitry Andric Lines[OpeningLineIndex - 1]->Last->opensScope() || 102*04eeddc0SDimitry Andric IsPPConditional(OpeningLineIndex - 1); 103*04eeddc0SDimitry Andric }; 104*04eeddc0SDimitry Andric const auto HasEnumOnLine = [&]() { 105*04eeddc0SDimitry Andric FormatToken *CurrentToken = CurrentLine->First; 106*04eeddc0SDimitry Andric bool FoundEnumKeyword = false; 107*04eeddc0SDimitry Andric while (CurrentToken) { 108*04eeddc0SDimitry Andric if (CurrentToken->is(tok::kw_enum)) 109*04eeddc0SDimitry Andric FoundEnumKeyword = true; 110*04eeddc0SDimitry Andric else if (FoundEnumKeyword && CurrentToken->is(tok::l_brace)) 111*04eeddc0SDimitry Andric return true; 112*04eeddc0SDimitry Andric CurrentToken = CurrentToken->Next; 113*04eeddc0SDimitry Andric } 114*04eeddc0SDimitry Andric return FoundEnumKeyword && I + 1 < Lines.size() && 115*04eeddc0SDimitry Andric Lines[I + 1]->First->is(tok::l_brace); 116*04eeddc0SDimitry Andric }; 117*04eeddc0SDimitry Andric 118*04eeddc0SDimitry Andric bool IsDefBlock = false; 119*04eeddc0SDimitry Andric const auto MayPrecedeDefinition = [&](const int Direction = -1) { 120*04eeddc0SDimitry Andric assert(Direction >= -1); 121*04eeddc0SDimitry Andric assert(Direction <= 1); 122*04eeddc0SDimitry Andric const size_t OperateIndex = OpeningLineIndex + Direction; 123*04eeddc0SDimitry Andric assert(OperateIndex < Lines.size()); 124*04eeddc0SDimitry Andric const auto &OperateLine = Lines[OperateIndex]; 125*04eeddc0SDimitry Andric if (LikelyDefinition(OperateLine)) 126*04eeddc0SDimitry Andric return false; 127*04eeddc0SDimitry Andric 128*04eeddc0SDimitry Andric if (OperateLine->First->is(tok::comment)) 129*04eeddc0SDimitry Andric return true; 130*04eeddc0SDimitry Andric 131*04eeddc0SDimitry Andric // A single line identifier that is not in the last line. 132*04eeddc0SDimitry Andric if (OperateLine->First->is(tok::identifier) && 133*04eeddc0SDimitry Andric OperateLine->First == OperateLine->Last && 134*04eeddc0SDimitry Andric OperateIndex + 1 < Lines.size()) { 135*04eeddc0SDimitry Andric // UnwrappedLineParser's recognition of free-standing macro like 136*04eeddc0SDimitry Andric // Q_OBJECT may also recognize some uppercased type names that may be 137*04eeddc0SDimitry Andric // used as return type as that kind of macros, which is a bit hard to 138*04eeddc0SDimitry Andric // distinguish one from another purely from token patterns. Here, we 139*04eeddc0SDimitry Andric // try not to add new lines below those identifiers. 140*04eeddc0SDimitry Andric AnnotatedLine *NextLine = Lines[OperateIndex + 1]; 141*04eeddc0SDimitry Andric if (NextLine->MightBeFunctionDecl && 142*04eeddc0SDimitry Andric NextLine->mightBeFunctionDefinition() && 143*04eeddc0SDimitry Andric NextLine->First->NewlinesBefore == 1 && 144*04eeddc0SDimitry Andric OperateLine->First->is(TT_FunctionLikeOrFreestandingMacro)) 145*04eeddc0SDimitry Andric return true; 146*04eeddc0SDimitry Andric } 147*04eeddc0SDimitry Andric 148*04eeddc0SDimitry Andric if ((Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare))) 149*04eeddc0SDimitry Andric return true; 150*04eeddc0SDimitry Andric return false; 151*04eeddc0SDimitry Andric }; 152*04eeddc0SDimitry Andric 153*04eeddc0SDimitry Andric if (HasEnumOnLine() && 154*04eeddc0SDimitry Andric !LikelyDefinition(CurrentLine, /*ExcludeEnum=*/true)) { 155*04eeddc0SDimitry Andric // We have no scope opening/closing information for enum. 156*04eeddc0SDimitry Andric IsDefBlock = true; 157*04eeddc0SDimitry Andric OpeningLineIndex = I; 158*04eeddc0SDimitry Andric while (OpeningLineIndex > 0 && MayPrecedeDefinition()) 159*04eeddc0SDimitry Andric --OpeningLineIndex; 160*04eeddc0SDimitry Andric OpeningLine = Lines[OpeningLineIndex]; 161*04eeddc0SDimitry Andric TargetLine = OpeningLine; 162*04eeddc0SDimitry Andric TargetToken = TargetLine->First; 163*04eeddc0SDimitry Andric if (!FollowingOtherOpening()) 164*04eeddc0SDimitry Andric InsertReplacement(NewlineCount); 165*04eeddc0SDimitry Andric else if (IsNeverStyle) 166*04eeddc0SDimitry Andric InsertReplacement(OpeningLineIndex != 0); 167*04eeddc0SDimitry Andric TargetLine = CurrentLine; 168*04eeddc0SDimitry Andric TargetToken = TargetLine->First; 169*04eeddc0SDimitry Andric while (TargetToken && !TargetToken->is(tok::r_brace)) 170*04eeddc0SDimitry Andric TargetToken = TargetToken->Next; 171*04eeddc0SDimitry Andric if (!TargetToken) { 172*04eeddc0SDimitry Andric while (I < Lines.size() && !Lines[I]->First->is(tok::r_brace)) 173*04eeddc0SDimitry Andric ++I; 174*04eeddc0SDimitry Andric } 175*04eeddc0SDimitry Andric } else if (CurrentLine->First->closesScope()) { 176*04eeddc0SDimitry Andric if (OpeningLineIndex > Lines.size()) 177*04eeddc0SDimitry Andric continue; 178*04eeddc0SDimitry Andric // Handling the case that opening brace has its own line, with checking 179*04eeddc0SDimitry Andric // whether the last line already had an opening brace to guard against 180*04eeddc0SDimitry Andric // misrecognition. 181*04eeddc0SDimitry Andric if (OpeningLineIndex > 0 && 182*04eeddc0SDimitry Andric Lines[OpeningLineIndex]->First->is(tok::l_brace) && 183*04eeddc0SDimitry Andric Lines[OpeningLineIndex - 1]->Last->isNot(tok::l_brace)) 184*04eeddc0SDimitry Andric --OpeningLineIndex; 185*04eeddc0SDimitry Andric OpeningLine = Lines[OpeningLineIndex]; 186*04eeddc0SDimitry Andric // Closing a function definition. 187*04eeddc0SDimitry Andric if (LikelyDefinition(OpeningLine)) { 188*04eeddc0SDimitry Andric IsDefBlock = true; 189*04eeddc0SDimitry Andric while (OpeningLineIndex > 0 && MayPrecedeDefinition()) 190*04eeddc0SDimitry Andric --OpeningLineIndex; 191*04eeddc0SDimitry Andric OpeningLine = Lines[OpeningLineIndex]; 192*04eeddc0SDimitry Andric TargetLine = OpeningLine; 193*04eeddc0SDimitry Andric TargetToken = TargetLine->First; 194*04eeddc0SDimitry Andric if (!FollowingOtherOpening()) { 195*04eeddc0SDimitry Andric // Avoid duplicated replacement. 196*04eeddc0SDimitry Andric if (TargetToken->isNot(tok::l_brace)) 197*04eeddc0SDimitry Andric InsertReplacement(NewlineCount); 198*04eeddc0SDimitry Andric } else if (IsNeverStyle) 199*04eeddc0SDimitry Andric InsertReplacement(OpeningLineIndex != 0); 200*04eeddc0SDimitry Andric } 201*04eeddc0SDimitry Andric } 202*04eeddc0SDimitry Andric 203*04eeddc0SDimitry Andric // Not the last token. 204*04eeddc0SDimitry Andric if (IsDefBlock && I + 1 < Lines.size()) { 205*04eeddc0SDimitry Andric OpeningLineIndex = I + 1; 206*04eeddc0SDimitry Andric TargetLine = Lines[OpeningLineIndex]; 207*04eeddc0SDimitry Andric TargetToken = TargetLine->First; 208*04eeddc0SDimitry Andric 209*04eeddc0SDimitry Andric // No empty line for continuously closing scopes. The token will be 210*04eeddc0SDimitry Andric // handled in another case if the line following is opening a 211*04eeddc0SDimitry Andric // definition. 212*04eeddc0SDimitry Andric if (!TargetToken->closesScope() && !IsPPConditional(OpeningLineIndex)) { 213*04eeddc0SDimitry Andric // Check whether current line may precede a definition line. 214*04eeddc0SDimitry Andric while (OpeningLineIndex + 1 < Lines.size() && 215*04eeddc0SDimitry Andric MayPrecedeDefinition(/*Direction=*/0)) 216*04eeddc0SDimitry Andric ++OpeningLineIndex; 217*04eeddc0SDimitry Andric TargetLine = Lines[OpeningLineIndex]; 218*04eeddc0SDimitry Andric if (!LikelyDefinition(TargetLine)) { 219*04eeddc0SDimitry Andric OpeningLineIndex = I + 1; 220*04eeddc0SDimitry Andric TargetLine = Lines[I + 1]; 221*04eeddc0SDimitry Andric TargetToken = TargetLine->First; 222*04eeddc0SDimitry Andric InsertReplacement(NewlineCount); 223*04eeddc0SDimitry Andric } 224*04eeddc0SDimitry Andric } else if (IsNeverStyle) 225*04eeddc0SDimitry Andric InsertReplacement(/*NewlineToInsert=*/1); 226*04eeddc0SDimitry Andric } 227*04eeddc0SDimitry Andric } 228*04eeddc0SDimitry Andric for (const auto &R : Whitespaces.generateReplacements()) 229*04eeddc0SDimitry Andric // The add method returns an Error instance which simulates program exit 230*04eeddc0SDimitry Andric // code through overloading boolean operator, thus false here indicates 231*04eeddc0SDimitry Andric // success. 232*04eeddc0SDimitry Andric if (Result.add(R)) 233*04eeddc0SDimitry Andric return; 234*04eeddc0SDimitry Andric } 235*04eeddc0SDimitry Andric } // namespace format 236*04eeddc0SDimitry Andric } // namespace clang 237