xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/BreakableToken.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===--- BreakableToken.cpp - Format C++ code -----------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric ///
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// Contains implementation of BreakableToken class and classes derived
110b57cec5SDimitry Andric /// from it.
120b57cec5SDimitry Andric ///
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "BreakableToken.h"
160b57cec5SDimitry Andric #include "ContinuationIndenter.h"
170b57cec5SDimitry Andric #include "clang/Basic/CharInfo.h"
180b57cec5SDimitry Andric #include "clang/Format/Format.h"
190b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
200b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
210b57cec5SDimitry Andric #include <algorithm>
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric #define DEBUG_TYPE "format-token-breaker"
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric namespace clang {
260b57cec5SDimitry Andric namespace format {
270b57cec5SDimitry Andric 
28e8d8bef9SDimitry Andric static constexpr StringRef Blanks = " \t\v\f\r";
290b57cec5SDimitry Andric static bool IsBlank(char C) {
300b57cec5SDimitry Andric   switch (C) {
310b57cec5SDimitry Andric   case ' ':
320b57cec5SDimitry Andric   case '\t':
330b57cec5SDimitry Andric   case '\v':
340b57cec5SDimitry Andric   case '\f':
350b57cec5SDimitry Andric   case '\r':
360b57cec5SDimitry Andric     return true;
370b57cec5SDimitry Andric   default:
380b57cec5SDimitry Andric     return false;
390b57cec5SDimitry Andric   }
400b57cec5SDimitry Andric }
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric static StringRef getLineCommentIndentPrefix(StringRef Comment,
430b57cec5SDimitry Andric                                             const FormatStyle &Style) {
44e8d8bef9SDimitry Andric   static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
45e8d8bef9SDimitry Andric                                                       "//!",  "//:",  "//"};
46e8d8bef9SDimitry Andric   static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
47e8d8bef9SDimitry Andric                                                          "//", "#"};
48e8d8bef9SDimitry Andric   ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
490b57cec5SDimitry Andric   if (Style.Language == FormatStyle::LK_TextProto)
500b57cec5SDimitry Andric     KnownPrefixes = KnownTextProtoPrefixes;
510b57cec5SDimitry Andric 
52bdd1243dSDimitry Andric   assert(
53bdd1243dSDimitry Andric       llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
54e8d8bef9SDimitry Andric         return Lhs.size() > Rhs.size();
55e8d8bef9SDimitry Andric       }));
56e8d8bef9SDimitry Andric 
570b57cec5SDimitry Andric   for (StringRef KnownPrefix : KnownPrefixes) {
585f757f3fSDimitry Andric     if (Comment.starts_with(KnownPrefix)) {
59e8d8bef9SDimitry Andric       const auto PrefixLength =
60e8d8bef9SDimitry Andric           Comment.find_first_not_of(' ', KnownPrefix.size());
61e8d8bef9SDimitry Andric       return Comment.substr(0, PrefixLength);
620b57cec5SDimitry Andric     }
630b57cec5SDimitry Andric   }
64e8d8bef9SDimitry Andric   return {};
650b57cec5SDimitry Andric }
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric static BreakableToken::Split
680b57cec5SDimitry Andric getCommentSplit(StringRef Text, unsigned ContentStartColumn,
690b57cec5SDimitry Andric                 unsigned ColumnLimit, unsigned TabWidth,
700b57cec5SDimitry Andric                 encoding::Encoding Encoding, const FormatStyle &Style,
710b57cec5SDimitry Andric                 bool DecorationEndsWithStar = false) {
720b57cec5SDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
730b57cec5SDimitry Andric                           << "\", Column limit: " << ColumnLimit
740b57cec5SDimitry Andric                           << ", Content start: " << ContentStartColumn << "\n");
750b57cec5SDimitry Andric   if (ColumnLimit <= ContentStartColumn + 1)
760b57cec5SDimitry Andric     return BreakableToken::Split(StringRef::npos, 0);
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric   unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
790b57cec5SDimitry Andric   unsigned MaxSplitBytes = 0;
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric   for (unsigned NumChars = 0;
820b57cec5SDimitry Andric        NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
830b57cec5SDimitry Andric     unsigned BytesInChar =
840b57cec5SDimitry Andric         encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
8506c3fb27SDimitry Andric     NumChars += encoding::columnWidthWithTabs(
8606c3fb27SDimitry Andric         Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,
8706c3fb27SDimitry Andric         TabWidth, Encoding);
880b57cec5SDimitry Andric     MaxSplitBytes += BytesInChar;
890b57cec5SDimitry Andric   }
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric   // In JavaScript, some @tags can be followed by {, and machinery that parses
920b57cec5SDimitry Andric   // these comments will fail to understand the comment if followed by a line
930b57cec5SDimitry Andric   // break. So avoid ever breaking before a {.
940eae32dcSDimitry Andric   if (Style.isJavaScript()) {
95e8d8bef9SDimitry Andric     StringRef::size_type SpaceOffset =
96e8d8bef9SDimitry Andric         Text.find_first_of(Blanks, MaxSplitBytes);
97e8d8bef9SDimitry Andric     if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
98e8d8bef9SDimitry Andric         Text[SpaceOffset + 1] == '{') {
99e8d8bef9SDimitry Andric       MaxSplitBytes = SpaceOffset + 1;
100e8d8bef9SDimitry Andric     }
101e8d8bef9SDimitry Andric   }
102e8d8bef9SDimitry Andric 
103e8d8bef9SDimitry Andric   StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
104e8d8bef9SDimitry Andric 
105e8d8bef9SDimitry Andric   static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
106e8d8bef9SDimitry Andric   // Some spaces are unacceptable to break on, rewind past them.
107e8d8bef9SDimitry Andric   while (SpaceOffset != StringRef::npos) {
108e8d8bef9SDimitry Andric     // If a line-comment ends with `\`, the next line continues the comment,
109e8d8bef9SDimitry Andric     // whether or not it starts with `//`. This is confusing and triggers
110e8d8bef9SDimitry Andric     // -Wcomment.
111e8d8bef9SDimitry Andric     // Avoid introducing multiline comments by not allowing a break right
112e8d8bef9SDimitry Andric     // after '\'.
113e8d8bef9SDimitry Andric     if (Style.isCpp()) {
114e8d8bef9SDimitry Andric       StringRef::size_type LastNonBlank =
115e8d8bef9SDimitry Andric           Text.find_last_not_of(Blanks, SpaceOffset);
116e8d8bef9SDimitry Andric       if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
117e8d8bef9SDimitry Andric         SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);
118e8d8bef9SDimitry Andric         continue;
119e8d8bef9SDimitry Andric       }
120e8d8bef9SDimitry Andric     }
121e8d8bef9SDimitry Andric 
122e8d8bef9SDimitry Andric     // Do not split before a number followed by a dot: this would be interpreted
123e8d8bef9SDimitry Andric     // as a numbered list, which would prevent re-flowing in subsequent passes.
124e8d8bef9SDimitry Andric     if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {
1250b57cec5SDimitry Andric       SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
126e8d8bef9SDimitry Andric       continue;
127e8d8bef9SDimitry Andric     }
128e8d8bef9SDimitry Andric 
129e8d8bef9SDimitry Andric     // Avoid ever breaking before a @tag or a { in JavaScript.
1300eae32dcSDimitry Andric     if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&
131e8d8bef9SDimitry Andric         (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
132e8d8bef9SDimitry Andric       SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
133e8d8bef9SDimitry Andric       continue;
134e8d8bef9SDimitry Andric     }
135e8d8bef9SDimitry Andric 
1360b57cec5SDimitry Andric     break;
1370b57cec5SDimitry Andric   }
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric   if (SpaceOffset == StringRef::npos ||
1400b57cec5SDimitry Andric       // Don't break at leading whitespace.
1410b57cec5SDimitry Andric       Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
1420b57cec5SDimitry Andric     // Make sure that we don't break at leading whitespace that
1430b57cec5SDimitry Andric     // reaches past MaxSplit.
1440b57cec5SDimitry Andric     StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
14581ad6265SDimitry Andric     if (FirstNonWhitespace == StringRef::npos) {
1460b57cec5SDimitry Andric       // If the comment is only whitespace, we cannot split.
1470b57cec5SDimitry Andric       return BreakableToken::Split(StringRef::npos, 0);
14881ad6265SDimitry Andric     }
1490b57cec5SDimitry Andric     SpaceOffset = Text.find_first_of(
1500b57cec5SDimitry Andric         Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
1510b57cec5SDimitry Andric   }
1520b57cec5SDimitry Andric   if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
1530b57cec5SDimitry Andric     // adaptStartOfLine will break after lines starting with /** if the comment
1540b57cec5SDimitry Andric     // is broken anywhere. Avoid emitting this break twice here.
1550b57cec5SDimitry Andric     // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
1560b57cec5SDimitry Andric     // insert a break after /**, so this code must not insert the same break.
1570b57cec5SDimitry Andric     if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
1580b57cec5SDimitry Andric       return BreakableToken::Split(StringRef::npos, 0);
1590b57cec5SDimitry Andric     StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
1600b57cec5SDimitry Andric     StringRef AfterCut = Text.substr(SpaceOffset);
1610b57cec5SDimitry Andric     // Don't trim the leading blanks if it would create a */ after the break.
1620b57cec5SDimitry Andric     if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/')
1630b57cec5SDimitry Andric       AfterCut = AfterCut.ltrim(Blanks);
1640b57cec5SDimitry Andric     return BreakableToken::Split(BeforeCut.size(),
1650b57cec5SDimitry Andric                                  AfterCut.begin() - BeforeCut.end());
1660b57cec5SDimitry Andric   }
1670b57cec5SDimitry Andric   return BreakableToken::Split(StringRef::npos, 0);
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric 
1700b57cec5SDimitry Andric static BreakableToken::Split
1710b57cec5SDimitry Andric getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
1720b57cec5SDimitry Andric                unsigned TabWidth, encoding::Encoding Encoding) {
1730b57cec5SDimitry Andric   // FIXME: Reduce unit test case.
1740b57cec5SDimitry Andric   if (Text.empty())
1750b57cec5SDimitry Andric     return BreakableToken::Split(StringRef::npos, 0);
1760b57cec5SDimitry Andric   if (ColumnLimit <= UsedColumns)
1770b57cec5SDimitry Andric     return BreakableToken::Split(StringRef::npos, 0);
1780b57cec5SDimitry Andric   unsigned MaxSplit = ColumnLimit - UsedColumns;
1790b57cec5SDimitry Andric   StringRef::size_type SpaceOffset = 0;
1800b57cec5SDimitry Andric   StringRef::size_type SlashOffset = 0;
1810b57cec5SDimitry Andric   StringRef::size_type WordStartOffset = 0;
1820b57cec5SDimitry Andric   StringRef::size_type SplitPoint = 0;
1830b57cec5SDimitry Andric   for (unsigned Chars = 0;;) {
1840b57cec5SDimitry Andric     unsigned Advance;
1850b57cec5SDimitry Andric     if (Text[0] == '\\') {
1860b57cec5SDimitry Andric       Advance = encoding::getEscapeSequenceLength(Text);
1870b57cec5SDimitry Andric       Chars += Advance;
1880b57cec5SDimitry Andric     } else {
1890b57cec5SDimitry Andric       Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
1900b57cec5SDimitry Andric       Chars += encoding::columnWidthWithTabs(
1910b57cec5SDimitry Andric           Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
1920b57cec5SDimitry Andric     }
1930b57cec5SDimitry Andric 
1940b57cec5SDimitry Andric     if (Chars > MaxSplit || Text.size() <= Advance)
1950b57cec5SDimitry Andric       break;
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric     if (IsBlank(Text[0]))
1980b57cec5SDimitry Andric       SpaceOffset = SplitPoint;
1990b57cec5SDimitry Andric     if (Text[0] == '/')
2000b57cec5SDimitry Andric       SlashOffset = SplitPoint;
2010b57cec5SDimitry Andric     if (Advance == 1 && !isAlphanumeric(Text[0]))
2020b57cec5SDimitry Andric       WordStartOffset = SplitPoint;
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric     SplitPoint += Advance;
2050b57cec5SDimitry Andric     Text = Text.substr(Advance);
2060b57cec5SDimitry Andric   }
2070b57cec5SDimitry Andric 
2080b57cec5SDimitry Andric   if (SpaceOffset != 0)
2090b57cec5SDimitry Andric     return BreakableToken::Split(SpaceOffset + 1, 0);
2100b57cec5SDimitry Andric   if (SlashOffset != 0)
2110b57cec5SDimitry Andric     return BreakableToken::Split(SlashOffset + 1, 0);
2120b57cec5SDimitry Andric   if (WordStartOffset != 0)
2130b57cec5SDimitry Andric     return BreakableToken::Split(WordStartOffset + 1, 0);
2140b57cec5SDimitry Andric   if (SplitPoint != 0)
2150b57cec5SDimitry Andric     return BreakableToken::Split(SplitPoint, 0);
2160b57cec5SDimitry Andric   return BreakableToken::Split(StringRef::npos, 0);
2170b57cec5SDimitry Andric }
2180b57cec5SDimitry Andric 
2190b57cec5SDimitry Andric bool switchesFormatting(const FormatToken &Token) {
2200b57cec5SDimitry Andric   assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
2210b57cec5SDimitry Andric          "formatting regions are switched by comment tokens");
2220b57cec5SDimitry Andric   StringRef Content = Token.TokenText.substr(2).ltrim();
2235f757f3fSDimitry Andric   return Content.starts_with("clang-format on") ||
2245f757f3fSDimitry Andric          Content.starts_with("clang-format off");
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric unsigned
2280b57cec5SDimitry Andric BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
2290b57cec5SDimitry Andric                                           Split Split) const {
2300b57cec5SDimitry Andric   // Example: consider the content
2310b57cec5SDimitry Andric   // lala  lala
2320b57cec5SDimitry Andric   // - RemainingTokenColumns is the original number of columns, 10;
2330b57cec5SDimitry Andric   // - Split is (4, 2), denoting the two spaces between the two words;
2340b57cec5SDimitry Andric   //
2350b57cec5SDimitry Andric   // We compute the number of columns when the split is compressed into a single
2360b57cec5SDimitry Andric   // space, like:
2370b57cec5SDimitry Andric   // lala lala
2380b57cec5SDimitry Andric   //
2390b57cec5SDimitry Andric   // FIXME: Correctly measure the length of whitespace in Split.second so it
2400b57cec5SDimitry Andric   // works with tabs.
2410b57cec5SDimitry Andric   return RemainingTokenColumns + 1 - Split.second;
2420b57cec5SDimitry Andric }
2430b57cec5SDimitry Andric 
2440b57cec5SDimitry Andric unsigned BreakableStringLiteral::getLineCount() const { return 1; }
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
2470b57cec5SDimitry Andric                                                 unsigned Offset,
2480b57cec5SDimitry Andric                                                 StringRef::size_type Length,
2490b57cec5SDimitry Andric                                                 unsigned StartColumn) const {
2500b57cec5SDimitry Andric   llvm_unreachable("Getting the length of a part of the string literal "
2510b57cec5SDimitry Andric                    "indicates that the code tries to reflow it.");
2520b57cec5SDimitry Andric }
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric unsigned
2550b57cec5SDimitry Andric BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
2560b57cec5SDimitry Andric                                            unsigned StartColumn) const {
2570b57cec5SDimitry Andric   return UnbreakableTailLength + Postfix.size() +
2581fd87a68SDimitry Andric          encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn,
2591fd87a68SDimitry Andric                                        Style.TabWidth, Encoding);
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric 
2620b57cec5SDimitry Andric unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
2630b57cec5SDimitry Andric                                                        bool Break) const {
2640b57cec5SDimitry Andric   return StartColumn + Prefix.size();
2650b57cec5SDimitry Andric }
2660b57cec5SDimitry Andric 
2670b57cec5SDimitry Andric BreakableStringLiteral::BreakableStringLiteral(
2680b57cec5SDimitry Andric     const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
2690b57cec5SDimitry Andric     StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
2700b57cec5SDimitry Andric     encoding::Encoding Encoding, const FormatStyle &Style)
2710b57cec5SDimitry Andric     : BreakableToken(Tok, InPPDirective, Encoding, Style),
2720b57cec5SDimitry Andric       StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
2730b57cec5SDimitry Andric       UnbreakableTailLength(UnbreakableTailLength) {
2745f757f3fSDimitry Andric   assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
2750b57cec5SDimitry Andric   Line = Tok.TokenText.substr(
2760b57cec5SDimitry Andric       Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
2770b57cec5SDimitry Andric }
2780b57cec5SDimitry Andric 
2790b57cec5SDimitry Andric BreakableToken::Split BreakableStringLiteral::getSplit(
2800b57cec5SDimitry Andric     unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
281480093f4SDimitry Andric     unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
2820b57cec5SDimitry Andric   return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
2830b57cec5SDimitry Andric                         ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
2840b57cec5SDimitry Andric }
2850b57cec5SDimitry Andric 
2860b57cec5SDimitry Andric void BreakableStringLiteral::insertBreak(unsigned LineIndex,
2870b57cec5SDimitry Andric                                          unsigned TailOffset, Split Split,
2880b57cec5SDimitry Andric                                          unsigned ContentIndent,
2890b57cec5SDimitry Andric                                          WhitespaceManager &Whitespaces) const {
2900b57cec5SDimitry Andric   Whitespaces.replaceWhitespaceInToken(
2910b57cec5SDimitry Andric       Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
2920b57cec5SDimitry Andric       Prefix, InPPDirective, 1, StartColumn);
2930b57cec5SDimitry Andric }
2940b57cec5SDimitry Andric 
2955f757f3fSDimitry Andric BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
2965f757f3fSDimitry Andric     const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
2975f757f3fSDimitry Andric     unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
2985f757f3fSDimitry Andric     encoding::Encoding Encoding, const FormatStyle &Style)
2995f757f3fSDimitry Andric     : BreakableStringLiteral(
3005f757f3fSDimitry Andric           Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
3015f757f3fSDimitry Andric                             : QuoteStyle == AtDoubleQuotes        ? "@\""
3025f757f3fSDimitry Andric                                                                   : "\"",
3035f757f3fSDimitry Andric           /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
3045f757f3fSDimitry Andric           UnbreakableTailLength, InPPDirective, Encoding, Style),
3055f757f3fSDimitry Andric       BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
3065f757f3fSDimitry Andric       QuoteStyle(QuoteStyle) {
3075f757f3fSDimitry Andric   // Find the replacement text for inserting braces and quotes and line breaks.
3085f757f3fSDimitry Andric   // We don't create an allocated string concatenated from parts here because it
3095f757f3fSDimitry Andric   // has to outlive the BreakableStringliteral object.  The brace replacements
3105f757f3fSDimitry Andric   // include a quote so that WhitespaceManager can tell it apart from whitespace
3115f757f3fSDimitry Andric   // replacements between the string and surrounding tokens.
3125f757f3fSDimitry Andric 
3135f757f3fSDimitry Andric   // The option is not implemented in JavaScript.
3145f757f3fSDimitry Andric   bool SignOnNewLine =
3155f757f3fSDimitry Andric       !Style.isJavaScript() &&
3165f757f3fSDimitry Andric       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
3175f757f3fSDimitry Andric 
3185f757f3fSDimitry Andric   if (Style.isVerilog()) {
3195f757f3fSDimitry Andric     // In Verilog, all strings are quoted by double quotes, joined by commas,
3205f757f3fSDimitry Andric     // and wrapped in braces.  The comma is always before the newline.
3215f757f3fSDimitry Andric     assert(QuoteStyle == DoubleQuotes);
3225f757f3fSDimitry Andric     LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
3235f757f3fSDimitry Andric     RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
3245f757f3fSDimitry Andric     Postfix = "\",";
3255f757f3fSDimitry Andric     Prefix = "\"";
3265f757f3fSDimitry Andric   } else {
3275f757f3fSDimitry Andric     // The plus sign may be on either line.  And also C# and JavaScript have
3285f757f3fSDimitry Andric     // several quoting styles.
3295f757f3fSDimitry Andric     if (QuoteStyle == SingleQuotes) {
3305f757f3fSDimitry Andric       LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('";
3315f757f3fSDimitry Andric       RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')";
3325f757f3fSDimitry Andric       Postfix = SignOnNewLine ? "'" : "' +";
3335f757f3fSDimitry Andric       Prefix = SignOnNewLine ? "+ '" : "'";
3345f757f3fSDimitry Andric     } else {
3355f757f3fSDimitry Andric       if (QuoteStyle == AtDoubleQuotes) {
3365f757f3fSDimitry Andric         LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@";
3375f757f3fSDimitry Andric         Prefix = SignOnNewLine ? "+ @\"" : "@\"";
3385f757f3fSDimitry Andric       } else {
3395f757f3fSDimitry Andric         LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";
3405f757f3fSDimitry Andric         Prefix = SignOnNewLine ? "+ \"" : "\"";
3415f757f3fSDimitry Andric       }
3425f757f3fSDimitry Andric       RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")";
3435f757f3fSDimitry Andric       Postfix = SignOnNewLine ? "\"" : "\" +";
3445f757f3fSDimitry Andric     }
3455f757f3fSDimitry Andric   }
3465f757f3fSDimitry Andric 
3475f757f3fSDimitry Andric   // Following lines are indented by the width of the brace and space if any.
3485f757f3fSDimitry Andric   ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0;
3495f757f3fSDimitry Andric   // The plus sign may need to be unindented depending on the style.
3505f757f3fSDimitry Andric   // FIXME: Add support for DontAlign.
3515f757f3fSDimitry Andric   if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
3525f757f3fSDimitry Andric       Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {
3535f757f3fSDimitry Andric     ContinuationIndent -= 2;
3545f757f3fSDimitry Andric   }
3555f757f3fSDimitry Andric }
3565f757f3fSDimitry Andric 
3575f757f3fSDimitry Andric unsigned BreakableStringLiteralUsingOperators::getRemainingLength(
3585f757f3fSDimitry Andric     unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
3595f757f3fSDimitry Andric   return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
3605f757f3fSDimitry Andric          encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn,
3615f757f3fSDimitry Andric                                        Style.TabWidth, Encoding);
3625f757f3fSDimitry Andric }
3635f757f3fSDimitry Andric 
3645f757f3fSDimitry Andric unsigned
3655f757f3fSDimitry Andric BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex,
3665f757f3fSDimitry Andric                                                             bool Break) const {
3675f757f3fSDimitry Andric   return std::max(
3685f757f3fSDimitry Andric       0,
3695f757f3fSDimitry Andric       static_cast<int>(StartColumn) +
3705f757f3fSDimitry Andric           (Break ? ContinuationIndent + static_cast<int>(Prefix.size())
3715f757f3fSDimitry Andric                  : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
3725f757f3fSDimitry Andric                                  : 0) +
3735f757f3fSDimitry Andric                        (QuoteStyle == AtDoubleQuotes ? 2 : 1)));
3745f757f3fSDimitry Andric }
3755f757f3fSDimitry Andric 
3765f757f3fSDimitry Andric void BreakableStringLiteralUsingOperators::insertBreak(
3775f757f3fSDimitry Andric     unsigned LineIndex, unsigned TailOffset, Split Split,
3785f757f3fSDimitry Andric     unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
3795f757f3fSDimitry Andric   Whitespaces.replaceWhitespaceInToken(
3805f757f3fSDimitry Andric       Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
3815f757f3fSDimitry Andric                Split.first,
3825f757f3fSDimitry Andric       /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
3835f757f3fSDimitry Andric       /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,
3845f757f3fSDimitry Andric       /*Spaces=*/
3855f757f3fSDimitry Andric       std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));
3865f757f3fSDimitry Andric }
3875f757f3fSDimitry Andric 
3885f757f3fSDimitry Andric void BreakableStringLiteralUsingOperators::updateAfterBroken(
3895f757f3fSDimitry Andric     WhitespaceManager &Whitespaces) const {
3905f757f3fSDimitry Andric   // Add the braces required for breaking the token if they are needed.
3915f757f3fSDimitry Andric   if (!BracesNeeded)
3925f757f3fSDimitry Andric     return;
3935f757f3fSDimitry Andric 
3945f757f3fSDimitry Andric   // To add a brace or parenthesis, we replace the quote (or the at sign) with a
3955f757f3fSDimitry Andric   // brace and another quote.  This is because the rest of the program requires
3965f757f3fSDimitry Andric   // one replacement for each source range.  If we replace the empty strings
3975f757f3fSDimitry Andric   // around the string, it may conflict with whitespace replacements between the
3985f757f3fSDimitry Andric   // string and adjacent tokens.
3995f757f3fSDimitry Andric   Whitespaces.replaceWhitespaceInToken(
4005f757f3fSDimitry Andric       Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
4015f757f3fSDimitry Andric       /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,
4025f757f3fSDimitry Andric       /*Spaces=*/0);
4035f757f3fSDimitry Andric   Whitespaces.replaceWhitespaceInToken(
4045f757f3fSDimitry Andric       Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
4055f757f3fSDimitry Andric       /*PreviousPostfix=*/RightBraceQuote,
4065f757f3fSDimitry Andric       /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);
4075f757f3fSDimitry Andric }
4085f757f3fSDimitry Andric 
4090b57cec5SDimitry Andric BreakableComment::BreakableComment(const FormatToken &Token,
4100b57cec5SDimitry Andric                                    unsigned StartColumn, bool InPPDirective,
4110b57cec5SDimitry Andric                                    encoding::Encoding Encoding,
4120b57cec5SDimitry Andric                                    const FormatStyle &Style)
4130b57cec5SDimitry Andric     : BreakableToken(Token, InPPDirective, Encoding, Style),
4140b57cec5SDimitry Andric       StartColumn(StartColumn) {}
4150b57cec5SDimitry Andric 
4160b57cec5SDimitry Andric unsigned BreakableComment::getLineCount() const { return Lines.size(); }
4170b57cec5SDimitry Andric 
4180b57cec5SDimitry Andric BreakableToken::Split
4190b57cec5SDimitry Andric BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
4200b57cec5SDimitry Andric                            unsigned ColumnLimit, unsigned ContentStartColumn,
421480093f4SDimitry Andric                            const llvm::Regex &CommentPragmasRegex) const {
4220b57cec5SDimitry Andric   // Don't break lines matching the comment pragmas regex.
4230b57cec5SDimitry Andric   if (CommentPragmasRegex.match(Content[LineIndex]))
4240b57cec5SDimitry Andric     return Split(StringRef::npos, 0);
4250b57cec5SDimitry Andric   return getCommentSplit(Content[LineIndex].substr(TailOffset),
4260b57cec5SDimitry Andric                          ContentStartColumn, ColumnLimit, Style.TabWidth,
4270b57cec5SDimitry Andric                          Encoding, Style);
4280b57cec5SDimitry Andric }
4290b57cec5SDimitry Andric 
4300b57cec5SDimitry Andric void BreakableComment::compressWhitespace(
4310b57cec5SDimitry Andric     unsigned LineIndex, unsigned TailOffset, Split Split,
4320b57cec5SDimitry Andric     WhitespaceManager &Whitespaces) const {
4330b57cec5SDimitry Andric   StringRef Text = Content[LineIndex].substr(TailOffset);
4340b57cec5SDimitry Andric   // Text is relative to the content line, but Whitespaces operates relative to
4350b57cec5SDimitry Andric   // the start of the corresponding token, so compute the start of the Split
4360b57cec5SDimitry Andric   // that needs to be compressed into a single space relative to the start of
4370b57cec5SDimitry Andric   // its token.
4380b57cec5SDimitry Andric   unsigned BreakOffsetInToken =
4390b57cec5SDimitry Andric       Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
4400b57cec5SDimitry Andric   unsigned CharsToRemove = Split.second;
4410b57cec5SDimitry Andric   Whitespaces.replaceWhitespaceInToken(
4420b57cec5SDimitry Andric       tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
4430b57cec5SDimitry Andric       /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
4440b57cec5SDimitry Andric }
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
4470b57cec5SDimitry Andric   return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
4480b57cec5SDimitry Andric }
4490b57cec5SDimitry Andric 
4500b57cec5SDimitry Andric static bool mayReflowContent(StringRef Content) {
4510b57cec5SDimitry Andric   Content = Content.trim(Blanks);
452*0fca6ea1SDimitry Andric   // Lines starting with '@' or '\' commonly have special meaning.
4530b57cec5SDimitry Andric   // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
4540b57cec5SDimitry Andric   bool hasSpecialMeaningPrefix = false;
4550b57cec5SDimitry Andric   for (StringRef Prefix :
456*0fca6ea1SDimitry Andric        {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
4575f757f3fSDimitry Andric     if (Content.starts_with(Prefix)) {
4580b57cec5SDimitry Andric       hasSpecialMeaningPrefix = true;
4590b57cec5SDimitry Andric       break;
4600b57cec5SDimitry Andric     }
4610b57cec5SDimitry Andric   }
4620b57cec5SDimitry Andric 
4630b57cec5SDimitry Andric   // Numbered lists may also start with a number followed by '.'
4640b57cec5SDimitry Andric   // To avoid issues if a line starts with a number which is actually the end
4650b57cec5SDimitry Andric   // of a previous line, we only consider numbers with up to 2 digits.
466480093f4SDimitry Andric   static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
4670b57cec5SDimitry Andric   hasSpecialMeaningPrefix =
468480093f4SDimitry Andric       hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric   // Simple heuristic for what to reflow: content should contain at least two
4710b57cec5SDimitry Andric   // characters and either the first or second character must be
4720b57cec5SDimitry Andric   // non-punctuation.
4730b57cec5SDimitry Andric   return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
4745f757f3fSDimitry Andric          !Content.ends_with("\\") &&
4750b57cec5SDimitry Andric          // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
4760b57cec5SDimitry Andric          // true, then the first code point must be 1 byte long.
4770b57cec5SDimitry Andric          (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
4780b57cec5SDimitry Andric }
4790b57cec5SDimitry Andric 
4800b57cec5SDimitry Andric BreakableBlockComment::BreakableBlockComment(
4810b57cec5SDimitry Andric     const FormatToken &Token, unsigned StartColumn,
4820b57cec5SDimitry Andric     unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
4830b57cec5SDimitry Andric     encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
4840b57cec5SDimitry Andric     : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
4850b57cec5SDimitry Andric       DelimitersOnNewline(false),
4860b57cec5SDimitry Andric       UnbreakableTailLength(Token.UnbreakableTailLength) {
4870b57cec5SDimitry Andric   assert(Tok.is(TT_BlockComment) &&
4880b57cec5SDimitry Andric          "block comment section must start with a block comment");
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric   StringRef TokenText(Tok.TokenText);
4915f757f3fSDimitry Andric   assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));
492a7dea167SDimitry Andric   TokenText.substr(2, TokenText.size() - 4)
493a7dea167SDimitry Andric       .split(Lines, UseCRLF ? "\r\n" : "\n");
4940b57cec5SDimitry Andric 
4950b57cec5SDimitry Andric   int IndentDelta = StartColumn - OriginalStartColumn;
4960b57cec5SDimitry Andric   Content.resize(Lines.size());
4970b57cec5SDimitry Andric   Content[0] = Lines[0];
4980b57cec5SDimitry Andric   ContentColumn.resize(Lines.size());
4990b57cec5SDimitry Andric   // Account for the initial '/*'.
5000b57cec5SDimitry Andric   ContentColumn[0] = StartColumn + 2;
5010b57cec5SDimitry Andric   Tokens.resize(Lines.size());
5020b57cec5SDimitry Andric   for (size_t i = 1; i < Lines.size(); ++i)
5030b57cec5SDimitry Andric     adjustWhitespace(i, IndentDelta);
5040b57cec5SDimitry Andric 
5050b57cec5SDimitry Andric   // Align decorations with the column of the star on the first line,
5060b57cec5SDimitry Andric   // that is one column after the start "/*".
5070b57cec5SDimitry Andric   DecorationColumn = StartColumn + 1;
5080b57cec5SDimitry Andric 
5090b57cec5SDimitry Andric   // Account for comment decoration patterns like this:
5100b57cec5SDimitry Andric   //
5110b57cec5SDimitry Andric   // /*
5120b57cec5SDimitry Andric   // ** blah blah blah
5130b57cec5SDimitry Andric   // */
5145f757f3fSDimitry Andric   if (Lines.size() >= 2 && Content[1].starts_with("**") &&
5150b57cec5SDimitry Andric       static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
5160b57cec5SDimitry Andric     DecorationColumn = StartColumn;
5170b57cec5SDimitry Andric   }
5180b57cec5SDimitry Andric 
5190b57cec5SDimitry Andric   Decoration = "* ";
5200b57cec5SDimitry Andric   if (Lines.size() == 1 && !FirstInLine) {
5210b57cec5SDimitry Andric     // Comments for which FirstInLine is false can start on arbitrary column,
5220b57cec5SDimitry Andric     // and available horizontal space can be too small to align consecutive
5230b57cec5SDimitry Andric     // lines with the first one.
5240b57cec5SDimitry Andric     // FIXME: We could, probably, align them to current indentation level, but
5250b57cec5SDimitry Andric     // now we just wrap them without stars.
5260b57cec5SDimitry Andric     Decoration = "";
5270b57cec5SDimitry Andric   }
52881ad6265SDimitry Andric   for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
52981ad6265SDimitry Andric     const StringRef &Text = Content[i];
53081ad6265SDimitry Andric     if (i + 1 == e) {
5310b57cec5SDimitry Andric       // If the last line is empty, the closing "*/" will have a star.
53281ad6265SDimitry Andric       if (Text.empty())
5330b57cec5SDimitry Andric         break;
5345f757f3fSDimitry Andric     } else if (!Text.empty() && Decoration.starts_with(Text)) {
5350b57cec5SDimitry Andric       continue;
53681ad6265SDimitry Andric     }
5375f757f3fSDimitry Andric     while (!Text.starts_with(Decoration))
53881ad6265SDimitry Andric       Decoration = Decoration.drop_back(1);
5390b57cec5SDimitry Andric   }
5400b57cec5SDimitry Andric 
5410b57cec5SDimitry Andric   LastLineNeedsDecoration = true;
5420b57cec5SDimitry Andric   IndentAtLineBreak = ContentColumn[0] + 1;
5430b57cec5SDimitry Andric   for (size_t i = 1, e = Lines.size(); i < e; ++i) {
5440b57cec5SDimitry Andric     if (Content[i].empty()) {
5450b57cec5SDimitry Andric       if (i + 1 == e) {
5460b57cec5SDimitry Andric         // Empty last line means that we already have a star as a part of the
5470b57cec5SDimitry Andric         // trailing */. We also need to preserve whitespace, so that */ is
5480b57cec5SDimitry Andric         // correctly indented.
5490b57cec5SDimitry Andric         LastLineNeedsDecoration = false;
5500b57cec5SDimitry Andric         // Align the star in the last '*/' with the stars on the previous lines.
55181ad6265SDimitry Andric         if (e >= 2 && !Decoration.empty())
5520b57cec5SDimitry Andric           ContentColumn[i] = DecorationColumn;
5530b57cec5SDimitry Andric       } else if (Decoration.empty()) {
5540b57cec5SDimitry Andric         // For all other lines, set the start column to 0 if they're empty, so
5550b57cec5SDimitry Andric         // we do not insert trailing whitespace anywhere.
5560b57cec5SDimitry Andric         ContentColumn[i] = 0;
5570b57cec5SDimitry Andric       }
5580b57cec5SDimitry Andric       continue;
5590b57cec5SDimitry Andric     }
5600b57cec5SDimitry Andric 
5610b57cec5SDimitry Andric     // The first line already excludes the star.
5620b57cec5SDimitry Andric     // The last line excludes the star if LastLineNeedsDecoration is false.
5630b57cec5SDimitry Andric     // For all other lines, adjust the line to exclude the star and
5640b57cec5SDimitry Andric     // (optionally) the first whitespace.
5655f757f3fSDimitry Andric     unsigned DecorationSize = Decoration.starts_with(Content[i])
5660b57cec5SDimitry Andric                                   ? Content[i].size()
5670b57cec5SDimitry Andric                                   : Decoration.size();
56881ad6265SDimitry Andric     if (DecorationSize)
5690b57cec5SDimitry Andric       ContentColumn[i] = DecorationColumn + DecorationSize;
5700b57cec5SDimitry Andric     Content[i] = Content[i].substr(DecorationSize);
5715f757f3fSDimitry Andric     if (!Decoration.starts_with(Content[i])) {
5720b57cec5SDimitry Andric       IndentAtLineBreak =
5730b57cec5SDimitry Andric           std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
5740b57cec5SDimitry Andric     }
57581ad6265SDimitry Andric   }
5760b57cec5SDimitry Andric   IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
5770b57cec5SDimitry Andric 
5780b57cec5SDimitry Andric   // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
5790eae32dcSDimitry Andric   if (Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) {
5805f757f3fSDimitry Andric     if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {
5810b57cec5SDimitry Andric       // This is a multiline jsdoc comment.
5820b57cec5SDimitry Andric       DelimitersOnNewline = true;
5835f757f3fSDimitry Andric     } else if (Lines[0].starts_with("* ") && Lines.size() == 1) {
5840b57cec5SDimitry Andric       // Detect a long single-line comment, like:
5850b57cec5SDimitry Andric       // /** long long long */
5860b57cec5SDimitry Andric       // Below, '2' is the width of '*/'.
5870b57cec5SDimitry Andric       unsigned EndColumn =
5880b57cec5SDimitry Andric           ContentColumn[0] +
5890b57cec5SDimitry Andric           encoding::columnWidthWithTabs(Lines[0], ContentColumn[0],
5900b57cec5SDimitry Andric                                         Style.TabWidth, Encoding) +
5910b57cec5SDimitry Andric           2;
5920b57cec5SDimitry Andric       DelimitersOnNewline = EndColumn > Style.ColumnLimit;
5930b57cec5SDimitry Andric     }
5940b57cec5SDimitry Andric   }
5950b57cec5SDimitry Andric 
5960b57cec5SDimitry Andric   LLVM_DEBUG({
5970b57cec5SDimitry Andric     llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
5980b57cec5SDimitry Andric     llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
5990b57cec5SDimitry Andric     for (size_t i = 0; i < Lines.size(); ++i) {
6000b57cec5SDimitry Andric       llvm::dbgs() << i << " |" << Content[i] << "| "
6010b57cec5SDimitry Andric                    << "CC=" << ContentColumn[i] << "| "
6020b57cec5SDimitry Andric                    << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
6030b57cec5SDimitry Andric     }
6040b57cec5SDimitry Andric   });
6050b57cec5SDimitry Andric }
6060b57cec5SDimitry Andric 
607a7dea167SDimitry Andric BreakableToken::Split BreakableBlockComment::getSplit(
608a7dea167SDimitry Andric     unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
609480093f4SDimitry Andric     unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
6100b57cec5SDimitry Andric   // Don't break lines matching the comment pragmas regex.
6110b57cec5SDimitry Andric   if (CommentPragmasRegex.match(Content[LineIndex]))
6120b57cec5SDimitry Andric     return Split(StringRef::npos, 0);
6130b57cec5SDimitry Andric   return getCommentSplit(Content[LineIndex].substr(TailOffset),
6140b57cec5SDimitry Andric                          ContentStartColumn, ColumnLimit, Style.TabWidth,
6155f757f3fSDimitry Andric                          Encoding, Style, Decoration.ends_with("*"));
6160b57cec5SDimitry Andric }
6170b57cec5SDimitry Andric 
6180b57cec5SDimitry Andric void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
6190b57cec5SDimitry Andric                                              int IndentDelta) {
6200b57cec5SDimitry Andric   // When in a preprocessor directive, the trailing backslash in a block comment
6210b57cec5SDimitry Andric   // is not needed, but can serve a purpose of uniformity with necessary escaped
6220b57cec5SDimitry Andric   // newlines outside the comment. In this case we remove it here before
6230b57cec5SDimitry Andric   // trimming the trailing whitespace. The backslash will be re-added later when
6240b57cec5SDimitry Andric   // inserting a line break.
6250b57cec5SDimitry Andric   size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
6265f757f3fSDimitry Andric   if (InPPDirective && Lines[LineIndex - 1].ends_with("\\"))
6270b57cec5SDimitry Andric     --EndOfPreviousLine;
6280b57cec5SDimitry Andric 
6290b57cec5SDimitry Andric   // Calculate the end of the non-whitespace text in the previous line.
6300b57cec5SDimitry Andric   EndOfPreviousLine =
6310b57cec5SDimitry Andric       Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
6320b57cec5SDimitry Andric   if (EndOfPreviousLine == StringRef::npos)
6330b57cec5SDimitry Andric     EndOfPreviousLine = 0;
6340b57cec5SDimitry Andric   else
6350b57cec5SDimitry Andric     ++EndOfPreviousLine;
6360b57cec5SDimitry Andric   // Calculate the start of the non-whitespace text in the current line.
6370b57cec5SDimitry Andric   size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
6380b57cec5SDimitry Andric   if (StartOfLine == StringRef::npos)
6390b57cec5SDimitry Andric     StartOfLine = Lines[LineIndex].size();
6400b57cec5SDimitry Andric 
6410b57cec5SDimitry Andric   StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
6420b57cec5SDimitry Andric   // Adjust Lines to only contain relevant text.
6430b57cec5SDimitry Andric   size_t PreviousContentOffset =
6440b57cec5SDimitry Andric       Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
6450b57cec5SDimitry Andric   Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
6460b57cec5SDimitry Andric       PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
6470b57cec5SDimitry Andric   Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
6480b57cec5SDimitry Andric 
6490b57cec5SDimitry Andric   // Adjust the start column uniformly across all lines.
6500b57cec5SDimitry Andric   ContentColumn[LineIndex] =
6510b57cec5SDimitry Andric       encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
6520b57cec5SDimitry Andric       IndentDelta;
6530b57cec5SDimitry Andric }
6540b57cec5SDimitry Andric 
6550b57cec5SDimitry Andric unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
6560b57cec5SDimitry Andric                                                unsigned Offset,
6570b57cec5SDimitry Andric                                                StringRef::size_type Length,
6580b57cec5SDimitry Andric                                                unsigned StartColumn) const {
6591fd87a68SDimitry Andric   return encoding::columnWidthWithTabs(
6601fd87a68SDimitry Andric       Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
6611fd87a68SDimitry Andric       Encoding);
6620b57cec5SDimitry Andric }
6630b57cec5SDimitry Andric 
6640b57cec5SDimitry Andric unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
6650b57cec5SDimitry Andric                                                    unsigned Offset,
6660b57cec5SDimitry Andric                                                    unsigned StartColumn) const {
6671fd87a68SDimitry Andric   unsigned LineLength =
6681fd87a68SDimitry Andric       UnbreakableTailLength +
6690b57cec5SDimitry Andric       getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
6701fd87a68SDimitry Andric   if (LineIndex + 1 == Lines.size()) {
6711fd87a68SDimitry Andric     LineLength += 2;
6721fd87a68SDimitry Andric     // We never need a decoration when breaking just the trailing "*/" postfix.
6731fd87a68SDimitry Andric     bool HasRemainingText = Offset < Content[LineIndex].size();
6741fd87a68SDimitry Andric     if (!HasRemainingText) {
6755f757f3fSDimitry Andric       bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);
6761fd87a68SDimitry Andric       if (HasDecoration)
6771fd87a68SDimitry Andric         LineLength -= Decoration.size();
6781fd87a68SDimitry Andric     }
6791fd87a68SDimitry Andric   }
6801fd87a68SDimitry Andric   return LineLength;
6810b57cec5SDimitry Andric }
6820b57cec5SDimitry Andric 
6830b57cec5SDimitry Andric unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
6840b57cec5SDimitry Andric                                                       bool Break) const {
6850b57cec5SDimitry Andric   if (Break)
6860b57cec5SDimitry Andric     return IndentAtLineBreak;
6870b57cec5SDimitry Andric   return std::max(0, ContentColumn[LineIndex]);
6880b57cec5SDimitry Andric }
6890b57cec5SDimitry Andric 
6900b57cec5SDimitry Andric const llvm::StringSet<>
6910b57cec5SDimitry Andric     BreakableBlockComment::ContentIndentingJavadocAnnotations = {
6920b57cec5SDimitry Andric         "@param", "@return",     "@returns", "@throws",  "@type", "@template",
6930b57cec5SDimitry Andric         "@see",   "@deprecated", "@define",  "@exports", "@mods", "@private",
6940b57cec5SDimitry Andric };
6950b57cec5SDimitry Andric 
6960b57cec5SDimitry Andric unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
6970eae32dcSDimitry Andric   if (Style.Language != FormatStyle::LK_Java && !Style.isJavaScript())
6980b57cec5SDimitry Andric     return 0;
6990b57cec5SDimitry Andric   // The content at LineIndex 0 of a comment like:
7000b57cec5SDimitry Andric   // /** line 0 */
7010b57cec5SDimitry Andric   // is "* line 0", so we need to skip over the decoration in that case.
7020b57cec5SDimitry Andric   StringRef ContentWithNoDecoration = Content[LineIndex];
7035f757f3fSDimitry Andric   if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))
7040b57cec5SDimitry Andric     ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
7050b57cec5SDimitry Andric   StringRef FirstWord = ContentWithNoDecoration.substr(
7060b57cec5SDimitry Andric       0, ContentWithNoDecoration.find_first_of(Blanks));
70706c3fb27SDimitry Andric   if (ContentIndentingJavadocAnnotations.contains(FirstWord))
7080b57cec5SDimitry Andric     return Style.ContinuationIndentWidth;
7090b57cec5SDimitry Andric   return 0;
7100b57cec5SDimitry Andric }
7110b57cec5SDimitry Andric 
7120b57cec5SDimitry Andric void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
7130b57cec5SDimitry Andric                                         Split Split, unsigned ContentIndent,
7140b57cec5SDimitry Andric                                         WhitespaceManager &Whitespaces) const {
7150b57cec5SDimitry Andric   StringRef Text = Content[LineIndex].substr(TailOffset);
7160b57cec5SDimitry Andric   StringRef Prefix = Decoration;
7170b57cec5SDimitry Andric   // We need this to account for the case when we have a decoration "* " for all
7180b57cec5SDimitry Andric   // the lines except for the last one, where the star in "*/" acts as a
7190b57cec5SDimitry Andric   // decoration.
7200b57cec5SDimitry Andric   unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
7210b57cec5SDimitry Andric   if (LineIndex + 1 == Lines.size() &&
7220b57cec5SDimitry Andric       Text.size() == Split.first + Split.second) {
7230b57cec5SDimitry Andric     // For the last line we need to break before "*/", but not to add "* ".
7240b57cec5SDimitry Andric     Prefix = "";
7250b57cec5SDimitry Andric     if (LocalIndentAtLineBreak >= 2)
7260b57cec5SDimitry Andric       LocalIndentAtLineBreak -= 2;
7270b57cec5SDimitry Andric   }
7280b57cec5SDimitry Andric   // The split offset is from the beginning of the line. Convert it to an offset
7290b57cec5SDimitry Andric   // from the beginning of the token text.
7300b57cec5SDimitry Andric   unsigned BreakOffsetInToken =
7310b57cec5SDimitry Andric       Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
7320b57cec5SDimitry Andric   unsigned CharsToRemove = Split.second;
7330b57cec5SDimitry Andric   assert(LocalIndentAtLineBreak >= Prefix.size());
7345ffd83dbSDimitry Andric   std::string PrefixWithTrailingIndent = std::string(Prefix);
7355ffd83dbSDimitry Andric   PrefixWithTrailingIndent.append(ContentIndent, ' ');
7360b57cec5SDimitry Andric   Whitespaces.replaceWhitespaceInToken(
7370b57cec5SDimitry Andric       tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
7380b57cec5SDimitry Andric       PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
7390b57cec5SDimitry Andric       /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
7400b57cec5SDimitry Andric           PrefixWithTrailingIndent.size());
7410b57cec5SDimitry Andric }
7420b57cec5SDimitry Andric 
743480093f4SDimitry Andric BreakableToken::Split BreakableBlockComment::getReflowSplit(
744480093f4SDimitry Andric     unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
7450b57cec5SDimitry Andric   if (!mayReflow(LineIndex, CommentPragmasRegex))
7460b57cec5SDimitry Andric     return Split(StringRef::npos, 0);
7470b57cec5SDimitry Andric 
7480b57cec5SDimitry Andric   // If we're reflowing into a line with content indent, only reflow the next
7490b57cec5SDimitry Andric   // line if its starting whitespace matches the content indent.
7500b57cec5SDimitry Andric   size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
7510b57cec5SDimitry Andric   if (LineIndex) {
7520b57cec5SDimitry Andric     unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
7530b57cec5SDimitry Andric     if (PreviousContentIndent && Trimmed != StringRef::npos &&
75481ad6265SDimitry Andric         Trimmed != PreviousContentIndent) {
7550b57cec5SDimitry Andric       return Split(StringRef::npos, 0);
7560b57cec5SDimitry Andric     }
75781ad6265SDimitry Andric   }
7580b57cec5SDimitry Andric 
7590b57cec5SDimitry Andric   return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
7600b57cec5SDimitry Andric }
7610b57cec5SDimitry Andric 
7620b57cec5SDimitry Andric bool BreakableBlockComment::introducesBreakBeforeToken() const {
7630b57cec5SDimitry Andric   // A break is introduced when we want delimiters on newline.
7640b57cec5SDimitry Andric   return DelimitersOnNewline &&
7650b57cec5SDimitry Andric          Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
7660b57cec5SDimitry Andric }
7670b57cec5SDimitry Andric 
7680b57cec5SDimitry Andric void BreakableBlockComment::reflow(unsigned LineIndex,
7690b57cec5SDimitry Andric                                    WhitespaceManager &Whitespaces) const {
7700b57cec5SDimitry Andric   StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
7710b57cec5SDimitry Andric   // Here we need to reflow.
7720b57cec5SDimitry Andric   assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
7730b57cec5SDimitry Andric          "Reflowing whitespace within a token");
7740b57cec5SDimitry Andric   // This is the offset of the end of the last line relative to the start of
7750b57cec5SDimitry Andric   // the token text in the token.
7760b57cec5SDimitry Andric   unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
7770b57cec5SDimitry Andric                                      Content[LineIndex - 1].size() -
7780b57cec5SDimitry Andric                                      tokenAt(LineIndex).TokenText.data();
7790b57cec5SDimitry Andric   unsigned WhitespaceLength = TrimmedContent.data() -
7800b57cec5SDimitry Andric                               tokenAt(LineIndex).TokenText.data() -
7810b57cec5SDimitry Andric                               WhitespaceOffsetInToken;
7820b57cec5SDimitry Andric   Whitespaces.replaceWhitespaceInToken(
7830b57cec5SDimitry Andric       tokenAt(LineIndex), WhitespaceOffsetInToken,
7840b57cec5SDimitry Andric       /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
7850b57cec5SDimitry Andric       /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
7860b57cec5SDimitry Andric       /*Spaces=*/0);
7870b57cec5SDimitry Andric }
7880b57cec5SDimitry Andric 
7890b57cec5SDimitry Andric void BreakableBlockComment::adaptStartOfLine(
7900b57cec5SDimitry Andric     unsigned LineIndex, WhitespaceManager &Whitespaces) const {
7910b57cec5SDimitry Andric   if (LineIndex == 0) {
7920b57cec5SDimitry Andric     if (DelimitersOnNewline) {
7930b57cec5SDimitry Andric       // Since we're breaking at index 1 below, the break position and the
7940b57cec5SDimitry Andric       // break length are the same.
7950b57cec5SDimitry Andric       // Note: this works because getCommentSplit is careful never to split at
7960b57cec5SDimitry Andric       // the beginning of a line.
7970b57cec5SDimitry Andric       size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
79881ad6265SDimitry Andric       if (BreakLength != StringRef::npos) {
7990b57cec5SDimitry Andric         insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,
8000b57cec5SDimitry Andric                     Whitespaces);
8010b57cec5SDimitry Andric       }
80281ad6265SDimitry Andric     }
8030b57cec5SDimitry Andric     return;
8040b57cec5SDimitry Andric   }
8050b57cec5SDimitry Andric   // Here no reflow with the previous line will happen.
8060b57cec5SDimitry Andric   // Fix the decoration of the line at LineIndex.
8070b57cec5SDimitry Andric   StringRef Prefix = Decoration;
8080b57cec5SDimitry Andric   if (Content[LineIndex].empty()) {
8090b57cec5SDimitry Andric     if (LineIndex + 1 == Lines.size()) {
8100b57cec5SDimitry Andric       if (!LastLineNeedsDecoration) {
8110b57cec5SDimitry Andric         // If the last line was empty, we don't need a prefix, as the */ will
8120b57cec5SDimitry Andric         // line up with the decoration (if it exists).
8130b57cec5SDimitry Andric         Prefix = "";
8140b57cec5SDimitry Andric       }
8150b57cec5SDimitry Andric     } else if (!Decoration.empty()) {
8160b57cec5SDimitry Andric       // For other empty lines, if we do have a decoration, adapt it to not
8170b57cec5SDimitry Andric       // contain a trailing whitespace.
8180b57cec5SDimitry Andric       Prefix = Prefix.substr(0, 1);
8190b57cec5SDimitry Andric     }
82081ad6265SDimitry Andric   } else if (ContentColumn[LineIndex] == 1) {
8210b57cec5SDimitry Andric     // This line starts immediately after the decorating *.
8220b57cec5SDimitry Andric     Prefix = Prefix.substr(0, 1);
8230b57cec5SDimitry Andric   }
8240b57cec5SDimitry Andric   // This is the offset of the end of the last line relative to the start of the
8250b57cec5SDimitry Andric   // token text in the token.
8260b57cec5SDimitry Andric   unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
8270b57cec5SDimitry Andric                                      Content[LineIndex - 1].size() -
8280b57cec5SDimitry Andric                                      tokenAt(LineIndex).TokenText.data();
8290b57cec5SDimitry Andric   unsigned WhitespaceLength = Content[LineIndex].data() -
8300b57cec5SDimitry Andric                               tokenAt(LineIndex).TokenText.data() -
8310b57cec5SDimitry Andric                               WhitespaceOffsetInToken;
8320b57cec5SDimitry Andric   Whitespaces.replaceWhitespaceInToken(
8330b57cec5SDimitry Andric       tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
8340b57cec5SDimitry Andric       InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
8350b57cec5SDimitry Andric }
8360b57cec5SDimitry Andric 
8370b57cec5SDimitry Andric BreakableToken::Split
8380b57cec5SDimitry Andric BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {
8390b57cec5SDimitry Andric   if (DelimitersOnNewline) {
8400b57cec5SDimitry Andric     // Replace the trailing whitespace of the last line with a newline.
8410b57cec5SDimitry Andric     // In case the last line is empty, the ending '*/' is already on its own
8420b57cec5SDimitry Andric     // line.
8430b57cec5SDimitry Andric     StringRef Line = Content.back().substr(TailOffset);
8440b57cec5SDimitry Andric     StringRef TrimmedLine = Line.rtrim(Blanks);
8450b57cec5SDimitry Andric     if (!TrimmedLine.empty())
8460b57cec5SDimitry Andric       return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
8470b57cec5SDimitry Andric   }
8480b57cec5SDimitry Andric   return Split(StringRef::npos, 0);
8490b57cec5SDimitry Andric }
8500b57cec5SDimitry Andric 
851480093f4SDimitry Andric bool BreakableBlockComment::mayReflow(
852480093f4SDimitry Andric     unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
8530b57cec5SDimitry Andric   // Content[LineIndex] may exclude the indent after the '*' decoration. In that
8540b57cec5SDimitry Andric   // case, we compute the start of the comment pragma manually.
8550b57cec5SDimitry Andric   StringRef IndentContent = Content[LineIndex];
8565f757f3fSDimitry Andric   if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))
8570b57cec5SDimitry Andric     IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
8580b57cec5SDimitry Andric   return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
8590b57cec5SDimitry Andric          mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
8600b57cec5SDimitry Andric          !switchesFormatting(tokenAt(LineIndex));
8610b57cec5SDimitry Andric }
8620b57cec5SDimitry Andric 
8630b57cec5SDimitry Andric BreakableLineCommentSection::BreakableLineCommentSection(
864e8d8bef9SDimitry Andric     const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
8650b57cec5SDimitry Andric     encoding::Encoding Encoding, const FormatStyle &Style)
8660b57cec5SDimitry Andric     : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
8670b57cec5SDimitry Andric   assert(Tok.is(TT_LineComment) &&
8680b57cec5SDimitry Andric          "line comment section must start with a line comment");
8690b57cec5SDimitry Andric   FormatToken *LineTok = nullptr;
87056f451bbSDimitry Andric   const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
871fe6060f1SDimitry Andric   // How many spaces we changed in the first line of the section, this will be
872fe6060f1SDimitry Andric   // applied in all following lines
873fe6060f1SDimitry Andric   int FirstLineSpaceChange = 0;
8740b57cec5SDimitry Andric   for (const FormatToken *CurrentTok = &Tok;
8750b57cec5SDimitry Andric        CurrentTok && CurrentTok->is(TT_LineComment);
8760b57cec5SDimitry Andric        CurrentTok = CurrentTok->Next) {
8770b57cec5SDimitry Andric     LastLineTok = LineTok;
8780b57cec5SDimitry Andric     StringRef TokenText(CurrentTok->TokenText);
8795f757f3fSDimitry Andric     assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&
8800b57cec5SDimitry Andric            "unsupported line comment prefix, '//' and '#' are supported");
8810b57cec5SDimitry Andric     size_t FirstLineIndex = Lines.size();
8820b57cec5SDimitry Andric     TokenText.split(Lines, "\n");
8830b57cec5SDimitry Andric     Content.resize(Lines.size());
8840b57cec5SDimitry Andric     ContentColumn.resize(Lines.size());
885fe6060f1SDimitry Andric     PrefixSpaceChange.resize(Lines.size());
8860b57cec5SDimitry Andric     Tokens.resize(Lines.size());
8870b57cec5SDimitry Andric     Prefix.resize(Lines.size());
8880b57cec5SDimitry Andric     OriginalPrefix.resize(Lines.size());
8890b57cec5SDimitry Andric     for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
8900b57cec5SDimitry Andric       Lines[i] = Lines[i].ltrim(Blanks);
891e8d8bef9SDimitry Andric       StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
892fe6060f1SDimitry Andric       OriginalPrefix[i] = IndentPrefix;
89356f451bbSDimitry Andric       const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');
894fe6060f1SDimitry Andric 
89581ad6265SDimitry Andric       // This lambda also considers multibyte character that is not handled in
89681ad6265SDimitry Andric       // functions like isPunctuation provided by CharInfo.
89781ad6265SDimitry Andric       const auto NoSpaceBeforeFirstCommentChar = [&]() {
89881ad6265SDimitry Andric         assert(Lines[i].size() > IndentPrefix.size());
89981ad6265SDimitry Andric         const char FirstCommentChar = Lines[i][IndentPrefix.size()];
90081ad6265SDimitry Andric         const unsigned FirstCharByteSize =
90181ad6265SDimitry Andric             encoding::getCodePointNumBytes(FirstCommentChar, Encoding);
90281ad6265SDimitry Andric         if (encoding::columnWidth(
90381ad6265SDimitry Andric                 Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
90481ad6265SDimitry Andric                 Encoding) != 1) {
90581ad6265SDimitry Andric           return false;
90681ad6265SDimitry Andric         }
90781ad6265SDimitry Andric         // In C-like comments, add a space before #. For example this is useful
90881ad6265SDimitry Andric         // to preserve the relative indentation when commenting out code with
90981ad6265SDimitry Andric         // #includes.
91081ad6265SDimitry Andric         //
91181ad6265SDimitry Andric         // In languages using # as the comment leader such as proto, don't
91281ad6265SDimitry Andric         // add a space to support patterns like:
91381ad6265SDimitry Andric         // #########
91481ad6265SDimitry Andric         // # section
91581ad6265SDimitry Andric         // #########
9165f757f3fSDimitry Andric         if (FirstCommentChar == '#' && !TokenText.starts_with("#"))
91781ad6265SDimitry Andric           return false;
91881ad6265SDimitry Andric         return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||
91981ad6265SDimitry Andric                isHorizontalWhitespace(FirstCommentChar);
92081ad6265SDimitry Andric       };
92181ad6265SDimitry Andric 
922fe6060f1SDimitry Andric       // On the first line of the comment section we calculate how many spaces
923fe6060f1SDimitry Andric       // are to be added or removed, all lines after that just get only the
924fe6060f1SDimitry Andric       // change and we will not look at the maximum anymore. Additionally to the
925fe6060f1SDimitry Andric       // actual first line, we calculate that when the non space Prefix changes,
926fe6060f1SDimitry Andric       // e.g. from "///" to "//".
927fe6060f1SDimitry Andric       if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=
928fe6060f1SDimitry Andric                         OriginalPrefix[i - 1].rtrim(Blanks)) {
92956f451bbSDimitry Andric         if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
93081ad6265SDimitry Andric             !NoSpaceBeforeFirstCommentChar()) {
93156f451bbSDimitry Andric           FirstLineSpaceChange = Minimum - SpacesInPrefix;
93256f451bbSDimitry Andric         } else if (static_cast<unsigned>(SpacesInPrefix) >
93356f451bbSDimitry Andric                    Style.SpacesInLineCommentPrefix.Maximum) {
934fe6060f1SDimitry Andric           FirstLineSpaceChange =
935fe6060f1SDimitry Andric               Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
936fe6060f1SDimitry Andric         } else {
937fe6060f1SDimitry Andric           FirstLineSpaceChange = 0;
938fe6060f1SDimitry Andric         }
939fe6060f1SDimitry Andric       }
940fe6060f1SDimitry Andric 
941fe6060f1SDimitry Andric       if (Lines[i].size() != IndentPrefix.size()) {
942fe6060f1SDimitry Andric         PrefixSpaceChange[i] = FirstLineSpaceChange;
943fe6060f1SDimitry Andric 
94456f451bbSDimitry Andric         if (SpacesInPrefix + PrefixSpaceChange[i] < Minimum) {
94556f451bbSDimitry Andric           PrefixSpaceChange[i] +=
94656f451bbSDimitry Andric               Minimum - (SpacesInPrefix + PrefixSpaceChange[i]);
947fe6060f1SDimitry Andric         }
948fe6060f1SDimitry Andric 
949fe6060f1SDimitry Andric         assert(Lines[i].size() > IndentPrefix.size());
950fe6060f1SDimitry Andric         const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
95181ad6265SDimitry Andric         const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);
95281ad6265SDimitry Andric         const bool LineRequiresLeadingSpace =
95381ad6265SDimitry Andric             !NoSpaceBeforeFirstCommentChar() ||
95481ad6265SDimitry Andric             (FirstNonSpace == '}' && FirstLineSpaceChange != 0);
95581ad6265SDimitry Andric         const bool AllowsSpaceChange =
95681ad6265SDimitry Andric             !IsFormatComment &&
95781ad6265SDimitry Andric             (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
958fe6060f1SDimitry Andric 
959fe6060f1SDimitry Andric         if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
960fe6060f1SDimitry Andric           Prefix[i] = IndentPrefix.str();
961fe6060f1SDimitry Andric           Prefix[i].append(PrefixSpaceChange[i], ' ');
962fe6060f1SDimitry Andric         } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
963fe6060f1SDimitry Andric           Prefix[i] = IndentPrefix
964fe6060f1SDimitry Andric                           .drop_back(std::min<std::size_t>(
965fe6060f1SDimitry Andric                               -PrefixSpaceChange[i], SpacesInPrefix))
966fe6060f1SDimitry Andric                           .str();
967fe6060f1SDimitry Andric         } else {
968fe6060f1SDimitry Andric           Prefix[i] = IndentPrefix.str();
969fe6060f1SDimitry Andric         }
970fe6060f1SDimitry Andric       } else {
971fe6060f1SDimitry Andric         // If the IndentPrefix is the whole line, there is no content and we
972fe6060f1SDimitry Andric         // drop just all space
973fe6060f1SDimitry Andric         Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
9740b57cec5SDimitry Andric       }
9750b57cec5SDimitry Andric 
9760b57cec5SDimitry Andric       Tokens[i] = LineTok;
9770b57cec5SDimitry Andric       Content[i] = Lines[i].substr(IndentPrefix.size());
9780b57cec5SDimitry Andric       ContentColumn[i] =
9790b57cec5SDimitry Andric           StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn,
9800b57cec5SDimitry Andric                                                       Style.TabWidth, Encoding);
9810b57cec5SDimitry Andric 
9820b57cec5SDimitry Andric       // Calculate the end of the non-whitespace text in this line.
9830b57cec5SDimitry Andric       size_t EndOfLine = Content[i].find_last_not_of(Blanks);
9840b57cec5SDimitry Andric       if (EndOfLine == StringRef::npos)
9850b57cec5SDimitry Andric         EndOfLine = Content[i].size();
9860b57cec5SDimitry Andric       else
9870b57cec5SDimitry Andric         ++EndOfLine;
9880b57cec5SDimitry Andric       Content[i] = Content[i].substr(0, EndOfLine);
9890b57cec5SDimitry Andric     }
9900b57cec5SDimitry Andric     LineTok = CurrentTok->Next;
9910b57cec5SDimitry Andric     if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
9920b57cec5SDimitry Andric       // A line comment section needs to broken by a line comment that is
9930b57cec5SDimitry Andric       // preceded by at least two newlines. Note that we put this break here
9940b57cec5SDimitry Andric       // instead of breaking at a previous stage during parsing, since that
9950b57cec5SDimitry Andric       // would split the contents of the enum into two unwrapped lines in this
9960b57cec5SDimitry Andric       // example, which is undesirable:
9970b57cec5SDimitry Andric       // enum A {
9980b57cec5SDimitry Andric       //   a, // comment about a
9990b57cec5SDimitry Andric       //
10000b57cec5SDimitry Andric       //   // comment about b
10010b57cec5SDimitry Andric       //   b
10020b57cec5SDimitry Andric       // };
10030b57cec5SDimitry Andric       //
10040b57cec5SDimitry Andric       // FIXME: Consider putting separate line comment sections as children to
10050b57cec5SDimitry Andric       // the unwrapped line instead.
10060b57cec5SDimitry Andric       break;
10070b57cec5SDimitry Andric     }
10080b57cec5SDimitry Andric   }
10090b57cec5SDimitry Andric }
10100b57cec5SDimitry Andric 
10110b57cec5SDimitry Andric unsigned
10120b57cec5SDimitry Andric BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
10130b57cec5SDimitry Andric                                             StringRef::size_type Length,
10140b57cec5SDimitry Andric                                             unsigned StartColumn) const {
10150b57cec5SDimitry Andric   return encoding::columnWidthWithTabs(
10160b57cec5SDimitry Andric       Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
10170b57cec5SDimitry Andric       Encoding);
10180b57cec5SDimitry Andric }
10190b57cec5SDimitry Andric 
1020fe6060f1SDimitry Andric unsigned
1021fe6060f1SDimitry Andric BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
1022fe6060f1SDimitry Andric                                                    bool /*Break*/) const {
10230b57cec5SDimitry Andric   return ContentColumn[LineIndex];
10240b57cec5SDimitry Andric }
10250b57cec5SDimitry Andric 
10260b57cec5SDimitry Andric void BreakableLineCommentSection::insertBreak(
10270b57cec5SDimitry Andric     unsigned LineIndex, unsigned TailOffset, Split Split,
10280b57cec5SDimitry Andric     unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
10290b57cec5SDimitry Andric   StringRef Text = Content[LineIndex].substr(TailOffset);
10300b57cec5SDimitry Andric   // Compute the offset of the split relative to the beginning of the token
10310b57cec5SDimitry Andric   // text.
10320b57cec5SDimitry Andric   unsigned BreakOffsetInToken =
10330b57cec5SDimitry Andric       Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
10340b57cec5SDimitry Andric   unsigned CharsToRemove = Split.second;
10350b57cec5SDimitry Andric   Whitespaces.replaceWhitespaceInToken(
10360b57cec5SDimitry Andric       tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
10370b57cec5SDimitry Andric       Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
1038fe6060f1SDimitry Andric       /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());
10390b57cec5SDimitry Andric }
10400b57cec5SDimitry Andric 
10410b57cec5SDimitry Andric BreakableComment::Split BreakableLineCommentSection::getReflowSplit(
1042480093f4SDimitry Andric     unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
10430b57cec5SDimitry Andric   if (!mayReflow(LineIndex, CommentPragmasRegex))
10440b57cec5SDimitry Andric     return Split(StringRef::npos, 0);
10450b57cec5SDimitry Andric 
10460b57cec5SDimitry Andric   size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
10470b57cec5SDimitry Andric 
10480b57cec5SDimitry Andric   // In a line comment section each line is a separate token; thus, after a
10490b57cec5SDimitry Andric   // split we replace all whitespace before the current line comment token
10500b57cec5SDimitry Andric   // (which does not need to be included in the split), plus the start of the
10510b57cec5SDimitry Andric   // line up to where the content starts.
10520b57cec5SDimitry Andric   return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
10530b57cec5SDimitry Andric }
10540b57cec5SDimitry Andric 
10550b57cec5SDimitry Andric void BreakableLineCommentSection::reflow(unsigned LineIndex,
10560b57cec5SDimitry Andric                                          WhitespaceManager &Whitespaces) const {
10570b57cec5SDimitry Andric   if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
10580b57cec5SDimitry Andric     // Reflow happens between tokens. Replace the whitespace between the
10590b57cec5SDimitry Andric     // tokens by the empty string.
10600b57cec5SDimitry Andric     Whitespaces.replaceWhitespace(
10610b57cec5SDimitry Andric         *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
10625ffd83dbSDimitry Andric         /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,
10635ffd83dbSDimitry Andric         /*InPPDirective=*/false);
10640b57cec5SDimitry Andric   } else if (LineIndex > 0) {
10650b57cec5SDimitry Andric     // In case we're reflowing after the '\' in:
10660b57cec5SDimitry Andric     //
10670b57cec5SDimitry Andric     //   // line comment \
10680b57cec5SDimitry Andric     //   // line 2
10690b57cec5SDimitry Andric     //
10700b57cec5SDimitry Andric     // the reflow happens inside the single comment token (it is a single line
10710b57cec5SDimitry Andric     // comment with an unescaped newline).
10720b57cec5SDimitry Andric     // Replace the whitespace between the '\' and '//' with the empty string.
10730b57cec5SDimitry Andric     //
10740b57cec5SDimitry Andric     // Offset points to after the '\' relative to start of the token.
10750b57cec5SDimitry Andric     unsigned Offset = Lines[LineIndex - 1].data() +
10760b57cec5SDimitry Andric                       Lines[LineIndex - 1].size() -
10770b57cec5SDimitry Andric                       tokenAt(LineIndex - 1).TokenText.data();
10780b57cec5SDimitry Andric     // WhitespaceLength is the number of chars between the '\' and the '//' on
10790b57cec5SDimitry Andric     // the next line.
10800b57cec5SDimitry Andric     unsigned WhitespaceLength =
10810b57cec5SDimitry Andric         Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
10820b57cec5SDimitry Andric     Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
10830b57cec5SDimitry Andric                                          /*ReplaceChars=*/WhitespaceLength,
10840b57cec5SDimitry Andric                                          /*PreviousPostfix=*/"",
10850b57cec5SDimitry Andric                                          /*CurrentPrefix=*/"",
10860b57cec5SDimitry Andric                                          /*InPPDirective=*/false,
10870b57cec5SDimitry Andric                                          /*Newlines=*/0,
10880b57cec5SDimitry Andric                                          /*Spaces=*/0);
10890b57cec5SDimitry Andric   }
10900b57cec5SDimitry Andric   // Replace the indent and prefix of the token with the reflow prefix.
10910b57cec5SDimitry Andric   unsigned Offset =
10920b57cec5SDimitry Andric       Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
10930b57cec5SDimitry Andric   unsigned WhitespaceLength =
10940b57cec5SDimitry Andric       Content[LineIndex].data() - Lines[LineIndex].data();
10950b57cec5SDimitry Andric   Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
10960b57cec5SDimitry Andric                                        /*ReplaceChars=*/WhitespaceLength,
10970b57cec5SDimitry Andric                                        /*PreviousPostfix=*/"",
10980b57cec5SDimitry Andric                                        /*CurrentPrefix=*/ReflowPrefix,
10990b57cec5SDimitry Andric                                        /*InPPDirective=*/false,
11000b57cec5SDimitry Andric                                        /*Newlines=*/0,
11010b57cec5SDimitry Andric                                        /*Spaces=*/0);
11020b57cec5SDimitry Andric }
11030b57cec5SDimitry Andric 
11040b57cec5SDimitry Andric void BreakableLineCommentSection::adaptStartOfLine(
11050b57cec5SDimitry Andric     unsigned LineIndex, WhitespaceManager &Whitespaces) const {
11060b57cec5SDimitry Andric   // If this is the first line of a token, we need to inform Whitespace Manager
11070b57cec5SDimitry Andric   // about it: either adapt the whitespace range preceding it, or mark it as an
11080b57cec5SDimitry Andric   // untouchable token.
11090b57cec5SDimitry Andric   // This happens for instance here:
11100b57cec5SDimitry Andric   // // line 1 \
11110b57cec5SDimitry Andric   // // line 2
11120b57cec5SDimitry Andric   if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
11130b57cec5SDimitry Andric     // This is the first line for the current token, but no reflow with the
11140b57cec5SDimitry Andric     // previous token is necessary. However, we still may need to adjust the
11150b57cec5SDimitry Andric     // start column. Note that ContentColumn[LineIndex] is the expected
11160b57cec5SDimitry Andric     // content column after a possible update to the prefix, hence the prefix
11170b57cec5SDimitry Andric     // length change is included.
11180b57cec5SDimitry Andric     unsigned LineColumn =
11190b57cec5SDimitry Andric         ContentColumn[LineIndex] -
11200b57cec5SDimitry Andric         (Content[LineIndex].data() - Lines[LineIndex].data()) +
11210b57cec5SDimitry Andric         (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
11220b57cec5SDimitry Andric 
11230b57cec5SDimitry Andric     // We always want to create a replacement instead of adding an untouchable
11240b57cec5SDimitry Andric     // token, even if LineColumn is the same as the original column of the
11250b57cec5SDimitry Andric     // token. This is because WhitespaceManager doesn't align trailing
11260b57cec5SDimitry Andric     // comments if they are untouchable.
11270b57cec5SDimitry Andric     Whitespaces.replaceWhitespace(*Tokens[LineIndex],
11280b57cec5SDimitry Andric                                   /*Newlines=*/1,
11290b57cec5SDimitry Andric                                   /*Spaces=*/LineColumn,
11300b57cec5SDimitry Andric                                   /*StartOfTokenColumn=*/LineColumn,
11315ffd83dbSDimitry Andric                                   /*IsAligned=*/true,
11320b57cec5SDimitry Andric                                   /*InPPDirective=*/false);
11330b57cec5SDimitry Andric   }
11340b57cec5SDimitry Andric   if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
11350b57cec5SDimitry Andric     // Adjust the prefix if necessary.
1136fe6060f1SDimitry Andric     const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
1137fe6060f1SDimitry Andric     const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
11380b57cec5SDimitry Andric     Whitespaces.replaceWhitespaceInToken(
1139fe6060f1SDimitry Andric         tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
1140fe6060f1SDimitry Andric         /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false,
1141fe6060f1SDimitry Andric         /*Newlines=*/0, /*Spaces=*/SpacesToAdd);
11420b57cec5SDimitry Andric   }
11430b57cec5SDimitry Andric }
11440b57cec5SDimitry Andric 
11450b57cec5SDimitry Andric void BreakableLineCommentSection::updateNextToken(LineState &State) const {
114681ad6265SDimitry Andric   if (LastLineTok)
11470b57cec5SDimitry Andric     State.NextToken = LastLineTok->Next;
11480b57cec5SDimitry Andric }
11490b57cec5SDimitry Andric 
11500b57cec5SDimitry Andric bool BreakableLineCommentSection::mayReflow(
1151480093f4SDimitry Andric     unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
11520b57cec5SDimitry Andric   // Line comments have the indent as part of the prefix, so we need to
11530b57cec5SDimitry Andric   // recompute the start of the line.
11540b57cec5SDimitry Andric   StringRef IndentContent = Content[LineIndex];
11555f757f3fSDimitry Andric   if (Lines[LineIndex].starts_with("//"))
11560b57cec5SDimitry Andric     IndentContent = Lines[LineIndex].substr(2);
11570b57cec5SDimitry Andric   // FIXME: Decide whether we want to reflow non-regular indents:
11580b57cec5SDimitry Andric   // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
11590b57cec5SDimitry Andric   // OriginalPrefix[LineIndex-1]. That means we don't reflow
11600b57cec5SDimitry Andric   // // text that protrudes
11610b57cec5SDimitry Andric   // //    into text with different indent
11620b57cec5SDimitry Andric   // We do reflow in that case in block comments.
11630b57cec5SDimitry Andric   return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
11640b57cec5SDimitry Andric          mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
11650b57cec5SDimitry Andric          !switchesFormatting(tokenAt(LineIndex)) &&
11660b57cec5SDimitry Andric          OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
11670b57cec5SDimitry Andric }
11680b57cec5SDimitry Andric 
11690b57cec5SDimitry Andric } // namespace format
11700b57cec5SDimitry Andric } // namespace clang
1171