xref: /llvm-project/clang/lib/Format/FormatToken.cpp (revision 6f31cf51dfdc2c317ba8149d57d2ffb583403833)
1 //===--- FormatToken.cpp - Format C++ code --------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements specific functions of \c FormatTokens and their
11 /// roles.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "FormatToken.h"
16 #include "ContinuationIndenter.h"
17 
18 namespace clang {
19 namespace format {
20 
21 const char *getTokenTypeName(TokenType Type) {
22   static const char *const TokNames[] = {
23 #define TYPE(X) #X,
24       LIST_TOKEN_TYPES
25 #undef TYPE
26       nullptr};
27 
28   if (Type < NUM_TOKEN_TYPES)
29     return TokNames[Type];
30   llvm_unreachable("unknown TokenType");
31   return nullptr;
32 }
33 
34 // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
35 // duplication.
36 bool FormatToken::isSimpleTypeSpecifier() const {
37   switch (Tok.getKind()) {
38   case tok::kw_short:
39   case tok::kw_long:
40   case tok::kw___int64:
41   case tok::kw___int128:
42   case tok::kw_signed:
43   case tok::kw_unsigned:
44   case tok::kw_void:
45   case tok::kw_char:
46   case tok::kw_int:
47   case tok::kw_half:
48   case tok::kw_float:
49   case tok::kw_double:
50   case tok::kw___bf16:
51   case tok::kw__Float16:
52   case tok::kw___float128:
53   case tok::kw___ibm128:
54   case tok::kw_wchar_t:
55   case tok::kw_bool:
56 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
57 #include "clang/Basic/TransformTypeTraits.def"
58   case tok::annot_typename:
59   case tok::kw_char8_t:
60   case tok::kw_char16_t:
61   case tok::kw_char32_t:
62   case tok::kw_typeof:
63   case tok::kw_decltype:
64   case tok::kw__Atomic:
65     return true;
66   default:
67     return false;
68   }
69 }
70 
71 // Sorted common C++ non-keyword types.
72 static SmallVector<StringRef> CppNonKeywordTypes = {
73     "clock_t",  "int16_t",   "int32_t", "int64_t",   "int8_t",
74     "intptr_t", "ptrdiff_t", "size_t",  "time_t",    "uint16_t",
75     "uint32_t", "uint64_t",  "uint8_t", "uintptr_t",
76 };
77 
78 bool FormatToken::isTypeName(bool IsCpp) const {
79   return is(TT_TypeName) || isSimpleTypeSpecifier() ||
80          (IsCpp && is(tok::identifier) &&
81           std::binary_search(CppNonKeywordTypes.begin(),
82                              CppNonKeywordTypes.end(), TokenText));
83 }
84 
85 bool FormatToken::isTypeOrIdentifier(bool IsCpp) const {
86   return isTypeName(IsCpp) || isOneOf(tok::kw_auto, tok::identifier);
87 }
88 
89 bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const {
90   assert(is(tok::r_brace));
91   if (!Style.Cpp11BracedListStyle ||
92       Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) {
93     return false;
94   }
95   const auto *LBrace = MatchingParen;
96   assert(LBrace && LBrace->is(tok::l_brace));
97   if (LBrace->is(BK_BracedInit))
98     return true;
99   if (LBrace->Previous && LBrace->Previous->is(tok::equal))
100     return true;
101   return false;
102 }
103 
104 bool FormatToken::opensBlockOrBlockTypeList(const FormatStyle &Style) const {
105   // C# Does not indent object initialisers as continuations.
106   if (is(tok::l_brace) && getBlockKind() == BK_BracedInit && Style.isCSharp())
107     return true;
108   if (is(TT_TemplateString) && opensScope())
109     return true;
110   return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) ||
111          (is(tok::l_brace) &&
112           (getBlockKind() == BK_Block || is(TT_DictLiteral) ||
113            (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
114          (is(tok::less) && Style.isProto());
115 }
116 
117 TokenRole::~TokenRole() {}
118 
119 void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {}
120 
121 unsigned CommaSeparatedList::formatAfterToken(LineState &State,
122                                               ContinuationIndenter *Indenter,
123                                               bool DryRun) {
124   if (!State.NextToken || !State.NextToken->Previous)
125     return 0;
126 
127   if (Formats.size() <= 1)
128     return 0; // Handled by formatFromToken (1) or avoid severe penalty (0).
129 
130   // Ensure that we start on the opening brace.
131   const FormatToken *LBrace =
132       State.NextToken->Previous->getPreviousNonComment();
133   if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
134       LBrace->is(BK_Block) || LBrace->is(TT_DictLiteral) ||
135       LBrace->Next->is(TT_DesignatedInitializerPeriod)) {
136     return 0;
137   }
138 
139   // Calculate the number of code points we have to format this list. As the
140   // first token is already placed, we have to subtract it.
141   unsigned RemainingCodePoints =
142       Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth;
143 
144   // Find the best ColumnFormat, i.e. the best number of columns to use.
145   const ColumnFormat *Format = getColumnFormat(RemainingCodePoints);
146 
147   // If no ColumnFormat can be used, the braced list would generally be
148   // bin-packed. Add a severe penalty to this so that column layouts are
149   // preferred if possible.
150   if (!Format)
151     return 10'000;
152 
153   // Format the entire list.
154   unsigned Penalty = 0;
155   unsigned Column = 0;
156   unsigned Item = 0;
157   while (State.NextToken != LBrace->MatchingParen) {
158     bool NewLine = false;
159     unsigned ExtraSpaces = 0;
160 
161     // If the previous token was one of our commas, we are now on the next item.
162     if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) {
163       if (!State.NextToken->isTrailingComment()) {
164         ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item];
165         ++Column;
166       }
167       ++Item;
168     }
169 
170     if (Column == Format->Columns || State.NextToken->MustBreakBefore) {
171       Column = 0;
172       NewLine = true;
173     }
174 
175     // Place token using the continuation indenter and store the penalty.
176     Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces);
177   }
178   return Penalty;
179 }
180 
181 unsigned CommaSeparatedList::formatFromToken(LineState &State,
182                                              ContinuationIndenter *Indenter,
183                                              bool DryRun) {
184   // Formatting with 1 Column isn't really a column layout, so we don't need the
185   // special logic here. We can just avoid bin packing any of the parameters.
186   if (Formats.size() == 1 || HasNestedBracedList)
187     State.Stack.back().AvoidBinPacking = true;
188   return 0;
189 }
190 
191 // Returns the lengths in code points between Begin and End (both included),
192 // assuming that the entire sequence is put on a single line.
193 static unsigned CodePointsBetween(const FormatToken *Begin,
194                                   const FormatToken *End) {
195   assert(End->TotalLength >= Begin->TotalLength);
196   return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth;
197 }
198 
199 void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
200   // FIXME: At some point we might want to do this for other lists, too.
201   if (!Token->MatchingParen ||
202       !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
203     return;
204   }
205 
206   // In C++11 braced list style, we should not format in columns unless they
207   // have many items (20 or more) or we allow bin-packing of function call
208   // arguments.
209   if (Style.Cpp11BracedListStyle && !Style.BinPackArguments &&
210       Commas.size() < 19) {
211     return;
212   }
213 
214   // Limit column layout for JavaScript array initializers to 20 or more items
215   // for now to introduce it carefully. We can become more aggressive if this
216   // necessary.
217   if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
218     return;
219 
220   // Column format doesn't really make sense if we don't align after brackets.
221   if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
222     return;
223 
224   FormatToken *ItemBegin = Token->Next;
225   while (ItemBegin->isTrailingComment())
226     ItemBegin = ItemBegin->Next;
227   SmallVector<bool, 8> MustBreakBeforeItem;
228 
229   // The lengths of an item if it is put at the end of the line. This includes
230   // trailing comments which are otherwise ignored for column alignment.
231   SmallVector<unsigned, 8> EndOfLineItemLength;
232   MustBreakBeforeItem.reserve(Commas.size() + 1);
233   EndOfLineItemLength.reserve(Commas.size() + 1);
234   ItemLengths.reserve(Commas.size() + 1);
235 
236   bool HasSeparatingComment = false;
237   for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
238     assert(ItemBegin);
239     // Skip comments on their own line.
240     while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
241       ItemBegin = ItemBegin->Next;
242       HasSeparatingComment = i > 0;
243     }
244 
245     MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore);
246     if (ItemBegin->is(tok::l_brace))
247       HasNestedBracedList = true;
248     const FormatToken *ItemEnd = nullptr;
249     if (i == Commas.size()) {
250       ItemEnd = Token->MatchingParen;
251       const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
252       ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
253       if (Style.Cpp11BracedListStyle &&
254           !ItemEnd->Previous->isTrailingComment()) {
255         // In Cpp11 braced list style, the } and possibly other subsequent
256         // tokens will need to stay on a line with the last element.
257         while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
258           ItemEnd = ItemEnd->Next;
259       } else {
260         // In other braced lists styles, the "}" can be wrapped to the new line.
261         ItemEnd = Token->MatchingParen->Previous;
262       }
263     } else {
264       ItemEnd = Commas[i];
265       // The comma is counted as part of the item when calculating the length.
266       ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd));
267 
268       // Consume trailing comments so the are included in EndOfLineItemLength.
269       if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline &&
270           ItemEnd->Next->isTrailingComment()) {
271         ItemEnd = ItemEnd->Next;
272       }
273     }
274     EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd));
275     // If there is a trailing comma in the list, the next item will start at the
276     // closing brace. Don't create an extra item for this.
277     if (ItemEnd->getNextNonComment() == Token->MatchingParen)
278       break;
279     ItemBegin = ItemEnd->Next;
280   }
281 
282   // Don't use column layout for lists with few elements and in presence of
283   // separating comments.
284   if (Commas.size() < 5 || HasSeparatingComment)
285     return;
286 
287   if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19)
288     return;
289 
290   // We can never place more than ColumnLimit / 3 items in a row (because of the
291   // spaces and the comma).
292   unsigned MaxItems = Style.ColumnLimit / 3;
293   SmallVector<unsigned> MinSizeInColumn;
294   MinSizeInColumn.reserve(MaxItems);
295   for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) {
296     ColumnFormat Format;
297     Format.Columns = Columns;
298     Format.ColumnSizes.resize(Columns);
299     MinSizeInColumn.assign(Columns, UINT_MAX);
300     Format.LineCount = 1;
301     bool HasRowWithSufficientColumns = false;
302     unsigned Column = 0;
303     for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) {
304       assert(i < MustBreakBeforeItem.size());
305       if (MustBreakBeforeItem[i] || Column == Columns) {
306         ++Format.LineCount;
307         Column = 0;
308       }
309       if (Column == Columns - 1)
310         HasRowWithSufficientColumns = true;
311       unsigned Length =
312           (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i];
313       Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length);
314       MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length);
315       ++Column;
316     }
317     // If all rows are terminated early (e.g. by trailing comments), we don't
318     // need to look further.
319     if (!HasRowWithSufficientColumns)
320       break;
321     Format.TotalWidth = Columns - 1; // Width of the N-1 spaces.
322 
323     for (unsigned i = 0; i < Columns; ++i)
324       Format.TotalWidth += Format.ColumnSizes[i];
325 
326     // Don't use this Format, if the difference between the longest and shortest
327     // element in a column exceeds a threshold to avoid excessive spaces.
328     if ([&] {
329           for (unsigned i = 0; i < Columns - 1; ++i)
330             if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10)
331               return true;
332           return false;
333         }()) {
334       continue;
335     }
336 
337     // Ignore layouts that are bound to violate the column limit.
338     if (Format.TotalWidth > Style.ColumnLimit && Columns > 1)
339       continue;
340 
341     Formats.push_back(Format);
342   }
343 }
344 
345 const CommaSeparatedList::ColumnFormat *
346 CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const {
347   const ColumnFormat *BestFormat = nullptr;
348   for (const ColumnFormat &Format : llvm::reverse(Formats)) {
349     if (Format.TotalWidth <= RemainingCharacters || Format.Columns == 1) {
350       if (BestFormat && Format.LineCount > BestFormat->LineCount)
351         break;
352       BestFormat = &Format;
353     }
354   }
355   return BestFormat;
356 }
357 
358 } // namespace format
359 } // namespace clang
360