10b57cec5SDimitry Andric //===--- FormatToken.cpp - Format C++ code --------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This file implements specific functions of \c FormatTokens and their 110b57cec5SDimitry Andric /// roles. 120b57cec5SDimitry Andric /// 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "FormatToken.h" 160b57cec5SDimitry Andric #include "ContinuationIndenter.h" 170b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 180b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 190b57cec5SDimitry Andric #include <climits> 200b57cec5SDimitry Andric 210b57cec5SDimitry Andric namespace clang { 220b57cec5SDimitry Andric namespace format { 230b57cec5SDimitry Andric 240b57cec5SDimitry Andric const char *getTokenTypeName(TokenType Type) { 250b57cec5SDimitry Andric static const char *const TokNames[] = { 260b57cec5SDimitry Andric #define TYPE(X) #X, 270b57cec5SDimitry Andric LIST_TOKEN_TYPES 280b57cec5SDimitry Andric #undef TYPE 290b57cec5SDimitry Andric nullptr}; 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric if (Type < NUM_TOKEN_TYPES) 320b57cec5SDimitry Andric return TokNames[Type]; 330b57cec5SDimitry Andric llvm_unreachable("unknown TokenType"); 340b57cec5SDimitry Andric return nullptr; 350b57cec5SDimitry Andric } 360b57cec5SDimitry Andric 37*0fca6ea1SDimitry Andric // Sorted common C++ non-keyword types. 38*0fca6ea1SDimitry Andric static SmallVector<StringRef> CppNonKeywordTypes = { 39*0fca6ea1SDimitry Andric "clock_t", "int16_t", "int32_t", "int64_t", "int8_t", 40*0fca6ea1SDimitry Andric "intptr_t", "ptrdiff_t", "size_t", "time_t", "uint16_t", 41*0fca6ea1SDimitry Andric "uint32_t", "uint64_t", "uint8_t", "uintptr_t", 42*0fca6ea1SDimitry Andric }; 43*0fca6ea1SDimitry Andric 44*0fca6ea1SDimitry Andric bool FormatToken::isTypeName(const LangOptions &LangOpts) const { 45*0fca6ea1SDimitry Andric const bool IsCpp = LangOpts.CXXOperatorNames; 46*0fca6ea1SDimitry Andric return is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts) || 47*0fca6ea1SDimitry Andric (IsCpp && is(tok::identifier) && 48*0fca6ea1SDimitry Andric std::binary_search(CppNonKeywordTypes.begin(), 49*0fca6ea1SDimitry Andric CppNonKeywordTypes.end(), TokenText)); 500b57cec5SDimitry Andric } 510b57cec5SDimitry Andric 52*0fca6ea1SDimitry Andric bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const { 53*0fca6ea1SDimitry Andric return isTypeName(LangOpts) || isOneOf(tok::kw_auto, tok::identifier); 540eae32dcSDimitry Andric } 550eae32dcSDimitry Andric 5606c3fb27SDimitry Andric bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const { 5706c3fb27SDimitry Andric assert(is(tok::r_brace)); 5806c3fb27SDimitry Andric if (!Style.Cpp11BracedListStyle || 5906c3fb27SDimitry Andric Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) { 6006c3fb27SDimitry Andric return false; 6106c3fb27SDimitry Andric } 6206c3fb27SDimitry Andric const auto *LBrace = MatchingParen; 6306c3fb27SDimitry Andric assert(LBrace && LBrace->is(tok::l_brace)); 6406c3fb27SDimitry Andric if (LBrace->is(BK_BracedInit)) 6506c3fb27SDimitry Andric return true; 6606c3fb27SDimitry Andric if (LBrace->Previous && LBrace->Previous->is(tok::equal)) 6706c3fb27SDimitry Andric return true; 6806c3fb27SDimitry Andric return false; 6906c3fb27SDimitry Andric } 7006c3fb27SDimitry Andric 7181ad6265SDimitry Andric bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const { 7281ad6265SDimitry Andric // C# Does not indent object initialisers as continuations. 7381ad6265SDimitry Andric if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp()) 7481ad6265SDimitry Andric return true; 7581ad6265SDimitry Andric if (is(TT_TemplateString) && opensScope()) 7681ad6265SDimitry Andric return true; 7781ad6265SDimitry Andric return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || 7881ad6265SDimitry Andric (is(tok::l_brace) && 7981ad6265SDimitry Andric (getBlockKind() == BK_Block || is(TT_DictLiteral) || 8081ad6265SDimitry Andric (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || 815f757f3fSDimitry Andric (is(tok::less) && Style.isProto()); 8281ad6265SDimitry Andric } 8381ad6265SDimitry Andric 840b57cec5SDimitry Andric TokenRole::~TokenRole() {} 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} 870b57cec5SDimitry Andric 880b57cec5SDimitry Andric unsigned CommaSeparatedList::formatAfterToken(LineState &State, 890b57cec5SDimitry Andric ContinuationIndenter *Indenter, 900b57cec5SDimitry Andric bool DryRun) { 9106c3fb27SDimitry Andric if (!State.NextToken || !State.NextToken->Previous) 920b57cec5SDimitry Andric return 0; 930b57cec5SDimitry Andric 94297eecfbSDimitry Andric if (Formats.size() <= 1) 95297eecfbSDimitry Andric return 0; // Handled by formatFromToken (1) or avoid severe penalty (0). 960b57cec5SDimitry Andric 970b57cec5SDimitry Andric // Ensure that we start on the opening brace. 980b57cec5SDimitry Andric const FormatToken *LBrace = 990b57cec5SDimitry Andric State.NextToken->Previous->getPreviousNonComment(); 1000b57cec5SDimitry Andric if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || 101e8d8bef9SDimitry Andric LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) || 10281ad6265SDimitry Andric LBrace->Next->is(TT_DesignatedInitializerPeriod)) { 1030b57cec5SDimitry Andric return 0; 10481ad6265SDimitry Andric } 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric // Calculate the number of code points we have to format this list. As the 1070b57cec5SDimitry Andric // first token is already placed, we have to subtract it. 1080b57cec5SDimitry Andric unsigned RemainingCodePoints = 1090b57cec5SDimitry Andric Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric // Find the best ColumnFormat, i.e. the best number of columns to use. 1120b57cec5SDimitry Andric const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric // If no ColumnFormat can be used, the braced list would generally be 1150b57cec5SDimitry Andric // bin-packed. Add a severe penalty to this so that column layouts are 1160b57cec5SDimitry Andric // preferred if possible. 1170b57cec5SDimitry Andric if (!Format) 118*0fca6ea1SDimitry Andric return 10'000; 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric // Format the entire list. 1210b57cec5SDimitry Andric unsigned Penalty = 0; 1220b57cec5SDimitry Andric unsigned Column = 0; 1230b57cec5SDimitry Andric unsigned Item = 0; 1240b57cec5SDimitry Andric while (State.NextToken != LBrace->MatchingParen) { 1250b57cec5SDimitry Andric bool NewLine = false; 1260b57cec5SDimitry Andric unsigned ExtraSpaces = 0; 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric // If the previous token was one of our commas, we are now on the next item. 1290b57cec5SDimitry Andric if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { 1300b57cec5SDimitry Andric if (!State.NextToken->isTrailingComment()) { 1310b57cec5SDimitry Andric ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; 1320b57cec5SDimitry Andric ++Column; 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric ++Item; 1350b57cec5SDimitry Andric } 1360b57cec5SDimitry Andric 1370b57cec5SDimitry Andric if (Column == Format->Columns || State.NextToken->MustBreakBefore) { 1380b57cec5SDimitry Andric Column = 0; 1390b57cec5SDimitry Andric NewLine = true; 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric // Place token using the continuation indenter and store the penalty. 1430b57cec5SDimitry Andric Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces); 1440b57cec5SDimitry Andric } 1450b57cec5SDimitry Andric return Penalty; 1460b57cec5SDimitry Andric } 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric unsigned CommaSeparatedList::formatFromToken(LineState &State, 1490b57cec5SDimitry Andric ContinuationIndenter *Indenter, 1500b57cec5SDimitry Andric bool DryRun) { 1510b57cec5SDimitry Andric // Formatting with 1 Column isn't really a column layout, so we don't need the 1520b57cec5SDimitry Andric // special logic here. We can just avoid bin packing any of the parameters. 1530b57cec5SDimitry Andric if (Formats.size() == 1 || HasNestedBracedList) 1540b57cec5SDimitry Andric State.Stack.back().AvoidBinPacking = true; 1550b57cec5SDimitry Andric return 0; 1560b57cec5SDimitry Andric } 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric // Returns the lengths in code points between Begin and End (both included), 1590b57cec5SDimitry Andric // assuming that the entire sequence is put on a single line. 1600b57cec5SDimitry Andric static unsigned CodePointsBetween(const FormatToken *Begin, 1610b57cec5SDimitry Andric const FormatToken *End) { 1620b57cec5SDimitry Andric assert(End->TotalLength >= Begin->TotalLength); 1630b57cec5SDimitry Andric return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; 1640b57cec5SDimitry Andric } 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { 1670b57cec5SDimitry Andric // FIXME: At some point we might want to do this for other lists, too. 1680b57cec5SDimitry Andric if (!Token->MatchingParen || 16981ad6265SDimitry Andric !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) { 1700b57cec5SDimitry Andric return; 17181ad6265SDimitry Andric } 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric // In C++11 braced list style, we should not format in columns unless they 1740b57cec5SDimitry Andric // have many items (20 or more) or we allow bin-packing of function call 1750b57cec5SDimitry Andric // arguments. 1760b57cec5SDimitry Andric if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && 17781ad6265SDimitry Andric Commas.size() < 19) { 1780b57cec5SDimitry Andric return; 17981ad6265SDimitry Andric } 1800b57cec5SDimitry Andric 1810b57cec5SDimitry Andric // Limit column layout for JavaScript array initializers to 20 or more items 1820b57cec5SDimitry Andric // for now to introduce it carefully. We can become more aggressive if this 1830b57cec5SDimitry Andric // necessary. 1840b57cec5SDimitry Andric if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19) 1850b57cec5SDimitry Andric return; 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric // Column format doesn't really make sense if we don't align after brackets. 1880b57cec5SDimitry Andric if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) 1890b57cec5SDimitry Andric return; 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric FormatToken *ItemBegin = Token->Next; 1920b57cec5SDimitry Andric while (ItemBegin->isTrailingComment()) 1930b57cec5SDimitry Andric ItemBegin = ItemBegin->Next; 1940b57cec5SDimitry Andric SmallVector<bool, 8> MustBreakBeforeItem; 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric // The lengths of an item if it is put at the end of the line. This includes 1970b57cec5SDimitry Andric // trailing comments which are otherwise ignored for column alignment. 1980b57cec5SDimitry Andric SmallVector<unsigned, 8> EndOfLineItemLength; 19981ad6265SDimitry Andric MustBreakBeforeItem.reserve(Commas.size() + 1); 20081ad6265SDimitry Andric EndOfLineItemLength.reserve(Commas.size() + 1); 20181ad6265SDimitry Andric ItemLengths.reserve(Commas.size() + 1); 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric bool HasSeparatingComment = false; 2040b57cec5SDimitry Andric for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { 20504eeddc0SDimitry Andric assert(ItemBegin); 2060b57cec5SDimitry Andric // Skip comments on their own line. 2070b57cec5SDimitry Andric while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { 2080b57cec5SDimitry Andric ItemBegin = ItemBegin->Next; 2090b57cec5SDimitry Andric HasSeparatingComment = i > 0; 2100b57cec5SDimitry Andric } 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore); 2130b57cec5SDimitry Andric if (ItemBegin->is(tok::l_brace)) 2140b57cec5SDimitry Andric HasNestedBracedList = true; 2150b57cec5SDimitry Andric const FormatToken *ItemEnd = nullptr; 2160b57cec5SDimitry Andric if (i == Commas.size()) { 2170b57cec5SDimitry Andric ItemEnd = Token->MatchingParen; 2180b57cec5SDimitry Andric const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment(); 2190b57cec5SDimitry Andric ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd)); 2200b57cec5SDimitry Andric if (Style.Cpp11BracedListStyle && 2210b57cec5SDimitry Andric !ItemEnd->Previous->isTrailingComment()) { 2220b57cec5SDimitry Andric // In Cpp11 braced list style, the } and possibly other subsequent 2230b57cec5SDimitry Andric // tokens will need to stay on a line with the last element. 2240b57cec5SDimitry Andric while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) 2250b57cec5SDimitry Andric ItemEnd = ItemEnd->Next; 2260b57cec5SDimitry Andric } else { 2270b57cec5SDimitry Andric // In other braced lists styles, the "}" can be wrapped to the new line. 2280b57cec5SDimitry Andric ItemEnd = Token->MatchingParen->Previous; 2290b57cec5SDimitry Andric } 2300b57cec5SDimitry Andric } else { 2310b57cec5SDimitry Andric ItemEnd = Commas[i]; 2320b57cec5SDimitry Andric // The comma is counted as part of the item when calculating the length. 2330b57cec5SDimitry Andric ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd)); 2340b57cec5SDimitry Andric 2350b57cec5SDimitry Andric // Consume trailing comments so the are included in EndOfLineItemLength. 2360b57cec5SDimitry Andric if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && 23781ad6265SDimitry Andric ItemEnd->Next->isTrailingComment()) { 2380b57cec5SDimitry Andric ItemEnd = ItemEnd->Next; 2390b57cec5SDimitry Andric } 24081ad6265SDimitry Andric } 2410b57cec5SDimitry Andric EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd)); 2420b57cec5SDimitry Andric // If there is a trailing comma in the list, the next item will start at the 2430b57cec5SDimitry Andric // closing brace. Don't create an extra item for this. 2440b57cec5SDimitry Andric if (ItemEnd->getNextNonComment() == Token->MatchingParen) 2450b57cec5SDimitry Andric break; 2460b57cec5SDimitry Andric ItemBegin = ItemEnd->Next; 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric // Don't use column layout for lists with few elements and in presence of 2500b57cec5SDimitry Andric // separating comments. 2510b57cec5SDimitry Andric if (Commas.size() < 5 || HasSeparatingComment) 2520b57cec5SDimitry Andric return; 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19) 2550b57cec5SDimitry Andric return; 2560b57cec5SDimitry Andric 2570b57cec5SDimitry Andric // We can never place more than ColumnLimit / 3 items in a row (because of the 2580b57cec5SDimitry Andric // spaces and the comma). 2590b57cec5SDimitry Andric unsigned MaxItems = Style.ColumnLimit / 3; 260753f127fSDimitry Andric SmallVector<unsigned> MinSizeInColumn; 2610b57cec5SDimitry Andric MinSizeInColumn.reserve(MaxItems); 2620b57cec5SDimitry Andric for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { 2630b57cec5SDimitry Andric ColumnFormat Format; 2640b57cec5SDimitry Andric Format.Columns = Columns; 2650b57cec5SDimitry Andric Format.ColumnSizes.resize(Columns); 2660b57cec5SDimitry Andric MinSizeInColumn.assign(Columns, UINT_MAX); 2670b57cec5SDimitry Andric Format.LineCount = 1; 2680b57cec5SDimitry Andric bool HasRowWithSufficientColumns = false; 2690b57cec5SDimitry Andric unsigned Column = 0; 2700b57cec5SDimitry Andric for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { 2710b57cec5SDimitry Andric assert(i < MustBreakBeforeItem.size()); 2720b57cec5SDimitry Andric if (MustBreakBeforeItem[i] || Column == Columns) { 2730b57cec5SDimitry Andric ++Format.LineCount; 2740b57cec5SDimitry Andric Column = 0; 2750b57cec5SDimitry Andric } 2760b57cec5SDimitry Andric if (Column == Columns - 1) 2770b57cec5SDimitry Andric HasRowWithSufficientColumns = true; 2780b57cec5SDimitry Andric unsigned Length = 2790b57cec5SDimitry Andric (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; 2800b57cec5SDimitry Andric Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length); 2810b57cec5SDimitry Andric MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length); 2820b57cec5SDimitry Andric ++Column; 2830b57cec5SDimitry Andric } 2840b57cec5SDimitry Andric // If all rows are terminated early (e.g. by trailing comments), we don't 2850b57cec5SDimitry Andric // need to look further. 2860b57cec5SDimitry Andric if (!HasRowWithSufficientColumns) 2870b57cec5SDimitry Andric break; 2880b57cec5SDimitry Andric Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric for (unsigned i = 0; i < Columns; ++i) 2910b57cec5SDimitry Andric Format.TotalWidth += Format.ColumnSizes[i]; 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric // Don't use this Format, if the difference between the longest and shortest 2940b57cec5SDimitry Andric // element in a column exceeds a threshold to avoid excessive spaces. 2950b57cec5SDimitry Andric if ([&] { 2960b57cec5SDimitry Andric for (unsigned i = 0; i < Columns - 1; ++i) 2970b57cec5SDimitry Andric if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) 2980b57cec5SDimitry Andric return true; 2990b57cec5SDimitry Andric return false; 30081ad6265SDimitry Andric }()) { 3010b57cec5SDimitry Andric continue; 30281ad6265SDimitry Andric } 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric // Ignore layouts that are bound to violate the column limit. 3050b57cec5SDimitry Andric if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) 3060b57cec5SDimitry Andric continue; 3070b57cec5SDimitry Andric 3080b57cec5SDimitry Andric Formats.push_back(Format); 3090b57cec5SDimitry Andric } 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric 3120b57cec5SDimitry Andric const CommaSeparatedList::ColumnFormat * 3130b57cec5SDimitry Andric CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { 3140b57cec5SDimitry Andric const ColumnFormat *BestFormat = nullptr; 31504eeddc0SDimitry Andric for (const ColumnFormat &Format : llvm::reverse(Formats)) { 31604eeddc0SDimitry Andric if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) { 31704eeddc0SDimitry Andric if (BestFormat && Format.LineCount > BestFormat->LineCount) 3180b57cec5SDimitry Andric break; 31904eeddc0SDimitry Andric BestFormat = &Format; 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric } 3220b57cec5SDimitry Andric return BestFormat; 3230b57cec5SDimitry Andric } 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric } // namespace format 3260b57cec5SDimitry Andric } // namespace clang 327