1 //===--- FormatToken.cpp - Format C++ code --------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements specific functions of \c FormatTokens and their 11 /// roles. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "FormatToken.h" 16 #include "ContinuationIndenter.h" 17 18 namespace clang { 19 namespace format { 20 21 const char *getTokenTypeName(TokenType Type) { 22 static const char *const TokNames[] = { 23 #define TYPE(X) #X, 24 LIST_TOKEN_TYPES 25 #undef TYPE 26 nullptr}; 27 28 if (Type < NUM_TOKEN_TYPES) 29 return TokNames[Type]; 30 llvm_unreachable("unknown TokenType"); 31 return nullptr; 32 } 33 34 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove 35 // duplication. 36 bool FormatToken::isSimpleTypeSpecifier() const { 37 switch (Tok.getKind()) { 38 case tok::kw_short: 39 case tok::kw_long: 40 case tok::kw___int64: 41 case tok::kw___int128: 42 case tok::kw_signed: 43 case tok::kw_unsigned: 44 case tok::kw_void: 45 case tok::kw_char: 46 case tok::kw_int: 47 case tok::kw_half: 48 case tok::kw_float: 49 case tok::kw_double: 50 case tok::kw___bf16: 51 case tok::kw__Float16: 52 case tok::kw___float128: 53 case tok::kw___ibm128: 54 case tok::kw_wchar_t: 55 case tok::kw_bool: 56 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait: 57 #include "clang/Basic/TransformTypeTraits.def" 58 case tok::annot_typename: 59 case tok::kw_char8_t: 60 case tok::kw_char16_t: 61 case tok::kw_char32_t: 62 case tok::kw_typeof: 63 case tok::kw_decltype: 64 case tok::kw__Atomic: 65 return true; 66 default: 67 return false; 68 } 69 } 70 71 // Sorted common C++ non-keyword types. 72 static SmallVector<StringRef> CppNonKeywordTypes = { 73 "clock_t", "int16_t", "int32_t", "int64_t", "int8_t", 74 "intptr_t", "ptrdiff_t", "size_t", "time_t", "uint16_t", 75 "uint32_t", "uint64_t", "uint8_t", "uintptr_t", 76 }; 77 78 bool FormatToken::isTypeName(bool IsCpp) const { 79 return is(TT_TypeName) || isSimpleTypeSpecifier() || 80 (IsCpp && is(tok::identifier) && 81 std::binary_search(CppNonKeywordTypes.begin(), 82 CppNonKeywordTypes.end(), TokenText)); 83 } 84 85 bool FormatToken::isTypeOrIdentifier(bool IsCpp) const { 86 return isTypeName(IsCpp) || isOneOf(tok::kw_auto, tok::identifier); 87 } 88 89 bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const { 90 assert(is(tok::r_brace)); 91 if (!Style.Cpp11BracedListStyle || 92 Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) { 93 return false; 94 } 95 const auto *LBrace = MatchingParen; 96 assert(LBrace && LBrace->is(tok::l_brace)); 97 if (LBrace->is(BK_BracedInit)) 98 return true; 99 if (LBrace->Previous && LBrace->Previous->is(tok::equal)) 100 return true; 101 return false; 102 } 103 104 bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const { 105 // C# Does not indent object initialisers as continuations. 106 if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp()) 107 return true; 108 if (is(TT_TemplateString) && opensScope()) 109 return true; 110 return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || 111 (is(tok::l_brace) && 112 (getBlockKind() == BK_Block || is(TT_DictLiteral) || 113 (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || 114 (is(tok::less) && Style.isProto()); 115 } 116 117 TokenRole::~TokenRole() {} 118 119 void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} 120 121 unsigned CommaSeparatedList::formatAfterToken(LineState &State, 122 ContinuationIndenter *Indenter, 123 bool DryRun) { 124 if (!State.NextToken || !State.NextToken->Previous) 125 return 0; 126 127 if (Formats.size() <= 1) 128 return 0; // Handled by formatFromToken (1) or avoid severe penalty (0). 129 130 // Ensure that we start on the opening brace. 131 const FormatToken *LBrace = 132 State.NextToken->Previous->getPreviousNonComment(); 133 if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || 134 LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) || 135 LBrace->Next->is(TT_DesignatedInitializerPeriod)) { 136 return 0; 137 } 138 139 // Calculate the number of code points we have to format this list. As the 140 // first token is already placed, we have to subtract it. 141 unsigned RemainingCodePoints = 142 Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; 143 144 // Find the best ColumnFormat, i.e. the best number of columns to use. 145 const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); 146 147 // If no ColumnFormat can be used, the braced list would generally be 148 // bin-packed. Add a severe penalty to this so that column layouts are 149 // preferred if possible. 150 if (!Format) 151 return 10'000; 152 153 // Format the entire list. 154 unsigned Penalty = 0; 155 unsigned Column = 0; 156 unsigned Item = 0; 157 while (State.NextToken != LBrace->MatchingParen) { 158 bool NewLine = false; 159 unsigned ExtraSpaces = 0; 160 161 // If the previous token was one of our commas, we are now on the next item. 162 if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { 163 if (!State.NextToken->isTrailingComment()) { 164 ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; 165 ++Column; 166 } 167 ++Item; 168 } 169 170 if (Column == Format->Columns || State.NextToken->MustBreakBefore) { 171 Column = 0; 172 NewLine = true; 173 } 174 175 // Place token using the continuation indenter and store the penalty. 176 Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces); 177 } 178 return Penalty; 179 } 180 181 unsigned CommaSeparatedList::formatFromToken(LineState &State, 182 ContinuationIndenter *Indenter, 183 bool DryRun) { 184 // Formatting with 1 Column isn't really a column layout, so we don't need the 185 // special logic here. We can just avoid bin packing any of the parameters. 186 if (Formats.size() == 1 || HasNestedBracedList) 187 State.Stack.back().AvoidBinPacking = true; 188 return 0; 189 } 190 191 // Returns the lengths in code points between Begin and End (both included), 192 // assuming that the entire sequence is put on a single line. 193 static unsigned CodePointsBetween(const FormatToken *Begin, 194 const FormatToken *End) { 195 assert(End->TotalLength >= Begin->TotalLength); 196 return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; 197 } 198 199 void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { 200 // FIXME: At some point we might want to do this for other lists, too. 201 if (!Token->MatchingParen || 202 !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) { 203 return; 204 } 205 206 // In C++11 braced list style, we should not format in columns unless they 207 // have many items (20 or more) or we allow bin-packing of function call 208 // arguments. 209 if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && 210 Commas.size() < 19) { 211 return; 212 } 213 214 // Limit column layout for JavaScript array initializers to 20 or more items 215 // for now to introduce it carefully. We can become more aggressive if this 216 // necessary. 217 if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19) 218 return; 219 220 // Column format doesn't really make sense if we don't align after brackets. 221 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) 222 return; 223 224 FormatToken *ItemBegin = Token->Next; 225 while (ItemBegin->isTrailingComment()) 226 ItemBegin = ItemBegin->Next; 227 SmallVector<bool, 8> MustBreakBeforeItem; 228 229 // The lengths of an item if it is put at the end of the line. This includes 230 // trailing comments which are otherwise ignored for column alignment. 231 SmallVector<unsigned, 8> EndOfLineItemLength; 232 MustBreakBeforeItem.reserve(Commas.size() + 1); 233 EndOfLineItemLength.reserve(Commas.size() + 1); 234 ItemLengths.reserve(Commas.size() + 1); 235 236 bool HasSeparatingComment = false; 237 for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { 238 assert(ItemBegin); 239 // Skip comments on their own line. 240 while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { 241 ItemBegin = ItemBegin->Next; 242 HasSeparatingComment = i > 0; 243 } 244 245 MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore); 246 if (ItemBegin->is(tok::l_brace)) 247 HasNestedBracedList = true; 248 const FormatToken *ItemEnd = nullptr; 249 if (i == Commas.size()) { 250 ItemEnd = Token->MatchingParen; 251 const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment(); 252 ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd)); 253 if (Style.Cpp11BracedListStyle && 254 !ItemEnd->Previous->isTrailingComment()) { 255 // In Cpp11 braced list style, the } and possibly other subsequent 256 // tokens will need to stay on a line with the last element. 257 while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) 258 ItemEnd = ItemEnd->Next; 259 } else { 260 // In other braced lists styles, the "}" can be wrapped to the new line. 261 ItemEnd = Token->MatchingParen->Previous; 262 } 263 } else { 264 ItemEnd = Commas[i]; 265 // The comma is counted as part of the item when calculating the length. 266 ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd)); 267 268 // Consume trailing comments so the are included in EndOfLineItemLength. 269 if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && 270 ItemEnd->Next->isTrailingComment()) { 271 ItemEnd = ItemEnd->Next; 272 } 273 } 274 EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd)); 275 // If there is a trailing comma in the list, the next item will start at the 276 // closing brace. Don't create an extra item for this. 277 if (ItemEnd->getNextNonComment() == Token->MatchingParen) 278 break; 279 ItemBegin = ItemEnd->Next; 280 } 281 282 // Don't use column layout for lists with few elements and in presence of 283 // separating comments. 284 if (Commas.size() < 5 || HasSeparatingComment) 285 return; 286 287 if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19) 288 return; 289 290 // We can never place more than ColumnLimit / 3 items in a row (because of the 291 // spaces and the comma). 292 unsigned MaxItems = Style.ColumnLimit / 3; 293 SmallVector<unsigned> MinSizeInColumn; 294 MinSizeInColumn.reserve(MaxItems); 295 for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { 296 ColumnFormat Format; 297 Format.Columns = Columns; 298 Format.ColumnSizes.resize(Columns); 299 MinSizeInColumn.assign(Columns, UINT_MAX); 300 Format.LineCount = 1; 301 bool HasRowWithSufficientColumns = false; 302 unsigned Column = 0; 303 for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { 304 assert(i < MustBreakBeforeItem.size()); 305 if (MustBreakBeforeItem[i] || Column == Columns) { 306 ++Format.LineCount; 307 Column = 0; 308 } 309 if (Column == Columns - 1) 310 HasRowWithSufficientColumns = true; 311 unsigned Length = 312 (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; 313 Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length); 314 MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length); 315 ++Column; 316 } 317 // If all rows are terminated early (e.g. by trailing comments), we don't 318 // need to look further. 319 if (!HasRowWithSufficientColumns) 320 break; 321 Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. 322 323 for (unsigned i = 0; i < Columns; ++i) 324 Format.TotalWidth += Format.ColumnSizes[i]; 325 326 // Don't use this Format, if the difference between the longest and shortest 327 // element in a column exceeds a threshold to avoid excessive spaces. 328 if ([&] { 329 for (unsigned i = 0; i < Columns - 1; ++i) 330 if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) 331 return true; 332 return false; 333 }()) { 334 continue; 335 } 336 337 // Ignore layouts that are bound to violate the column limit. 338 if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) 339 continue; 340 341 Formats.push_back(Format); 342 } 343 } 344 345 const CommaSeparatedList::ColumnFormat * 346 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { 347 const ColumnFormat *BestFormat = nullptr; 348 for (const ColumnFormat &Format : llvm::reverse(Formats)) { 349 if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) { 350 if (BestFormat && Format.LineCount > BestFormat->LineCount) 351 break; 352 BestFormat = &Format; 353 } 354 } 355 return BestFormat; 356 } 357 358 } // namespace format 359 } // namespace clang 360