10b57cec5SDimitry Andric //===--- BreakableToken.cpp - Format C++ code -----------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// Contains implementation of BreakableToken class and classes derived 110b57cec5SDimitry Andric /// from it. 120b57cec5SDimitry Andric /// 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "BreakableToken.h" 160b57cec5SDimitry Andric #include "ContinuationIndenter.h" 170b57cec5SDimitry Andric #include "clang/Basic/CharInfo.h" 180b57cec5SDimitry Andric #include "clang/Format/Format.h" 190b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 200b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 210b57cec5SDimitry Andric #include <algorithm> 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric #define DEBUG_TYPE "format-token-breaker" 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric namespace clang { 260b57cec5SDimitry Andric namespace format { 270b57cec5SDimitry Andric 28e8d8bef9SDimitry Andric static constexpr StringRef Blanks = " \t\v\f\r"; 290b57cec5SDimitry Andric static bool IsBlank(char C) { 300b57cec5SDimitry Andric switch (C) { 310b57cec5SDimitry Andric case ' ': 320b57cec5SDimitry Andric case '\t': 330b57cec5SDimitry Andric case '\v': 340b57cec5SDimitry Andric case '\f': 350b57cec5SDimitry Andric case '\r': 360b57cec5SDimitry Andric return true; 370b57cec5SDimitry Andric default: 380b57cec5SDimitry Andric return false; 390b57cec5SDimitry Andric } 400b57cec5SDimitry Andric } 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric static StringRef getLineCommentIndentPrefix(StringRef Comment, 430b57cec5SDimitry Andric const FormatStyle &Style) { 44e8d8bef9SDimitry Andric static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///", 45e8d8bef9SDimitry Andric "//!", "//:", "//"}; 46e8d8bef9SDimitry Andric static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##", 47e8d8bef9SDimitry Andric "//", "#"}; 48e8d8bef9SDimitry Andric ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes); 490b57cec5SDimitry Andric if (Style.Language == FormatStyle::LK_TextProto) 500b57cec5SDimitry Andric KnownPrefixes = KnownTextProtoPrefixes; 510b57cec5SDimitry Andric 52bdd1243dSDimitry Andric assert( 53bdd1243dSDimitry Andric llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept { 54e8d8bef9SDimitry Andric return Lhs.size() > Rhs.size(); 55e8d8bef9SDimitry Andric })); 56e8d8bef9SDimitry Andric 570b57cec5SDimitry Andric for (StringRef KnownPrefix : KnownPrefixes) { 585f757f3fSDimitry Andric if (Comment.starts_with(KnownPrefix)) { 59e8d8bef9SDimitry Andric const auto PrefixLength = 60e8d8bef9SDimitry Andric Comment.find_first_not_of(' ', KnownPrefix.size()); 61e8d8bef9SDimitry Andric return Comment.substr(0, PrefixLength); 620b57cec5SDimitry Andric } 630b57cec5SDimitry Andric } 64e8d8bef9SDimitry Andric return {}; 650b57cec5SDimitry Andric } 660b57cec5SDimitry Andric 670b57cec5SDimitry Andric static BreakableToken::Split 680b57cec5SDimitry Andric getCommentSplit(StringRef Text, unsigned ContentStartColumn, 690b57cec5SDimitry Andric unsigned ColumnLimit, unsigned TabWidth, 700b57cec5SDimitry Andric encoding::Encoding Encoding, const FormatStyle &Style, 710b57cec5SDimitry Andric bool DecorationEndsWithStar = false) { 720b57cec5SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text 730b57cec5SDimitry Andric << "\", Column limit: " << ColumnLimit 740b57cec5SDimitry Andric << ", Content start: " << ContentStartColumn << "\n"); 750b57cec5SDimitry Andric if (ColumnLimit <= ContentStartColumn + 1) 760b57cec5SDimitry Andric return BreakableToken::Split(StringRef::npos, 0); 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 790b57cec5SDimitry Andric unsigned MaxSplitBytes = 0; 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric for (unsigned NumChars = 0; 820b57cec5SDimitry Andric NumChars < MaxSplit && MaxSplitBytes < Text.size();) { 830b57cec5SDimitry Andric unsigned BytesInChar = 840b57cec5SDimitry Andric encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 8506c3fb27SDimitry Andric NumChars += encoding::columnWidthWithTabs( 8606c3fb27SDimitry Andric Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars, 8706c3fb27SDimitry Andric TabWidth, Encoding); 880b57cec5SDimitry Andric MaxSplitBytes += BytesInChar; 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric // In JavaScript, some @tags can be followed by {, and machinery that parses 920b57cec5SDimitry Andric // these comments will fail to understand the comment if followed by a line 930b57cec5SDimitry Andric // break. So avoid ever breaking before a {. 940eae32dcSDimitry Andric if (Style.isJavaScript()) { 95e8d8bef9SDimitry Andric StringRef::size_type SpaceOffset = 96e8d8bef9SDimitry Andric Text.find_first_of(Blanks, MaxSplitBytes); 97e8d8bef9SDimitry Andric if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() && 98e8d8bef9SDimitry Andric Text[SpaceOffset + 1] == '{') { 99e8d8bef9SDimitry Andric MaxSplitBytes = SpaceOffset + 1; 100e8d8bef9SDimitry Andric } 101e8d8bef9SDimitry Andric } 102e8d8bef9SDimitry Andric 103e8d8bef9SDimitry Andric StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 104e8d8bef9SDimitry Andric 105e8d8bef9SDimitry Andric static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\."); 106e8d8bef9SDimitry Andric // Some spaces are unacceptable to break on, rewind past them. 107e8d8bef9SDimitry Andric while (SpaceOffset != StringRef::npos) { 108e8d8bef9SDimitry Andric // If a line-comment ends with `\`, the next line continues the comment, 109e8d8bef9SDimitry Andric // whether or not it starts with `//`. This is confusing and triggers 110e8d8bef9SDimitry Andric // -Wcomment. 111e8d8bef9SDimitry Andric // Avoid introducing multiline comments by not allowing a break right 112e8d8bef9SDimitry Andric // after '\'. 113e8d8bef9SDimitry Andric if (Style.isCpp()) { 114e8d8bef9SDimitry Andric StringRef::size_type LastNonBlank = 115e8d8bef9SDimitry Andric Text.find_last_not_of(Blanks, SpaceOffset); 116e8d8bef9SDimitry Andric if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') { 117e8d8bef9SDimitry Andric SpaceOffset = Text.find_last_of(Blanks, LastNonBlank); 118e8d8bef9SDimitry Andric continue; 119e8d8bef9SDimitry Andric } 120e8d8bef9SDimitry Andric } 121e8d8bef9SDimitry Andric 122e8d8bef9SDimitry Andric // Do not split before a number followed by a dot: this would be interpreted 123e8d8bef9SDimitry Andric // as a numbered list, which would prevent re-flowing in subsequent passes. 124e8d8bef9SDimitry Andric if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) { 1250b57cec5SDimitry Andric SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); 126e8d8bef9SDimitry Andric continue; 127e8d8bef9SDimitry Andric } 128e8d8bef9SDimitry Andric 129e8d8bef9SDimitry Andric // Avoid ever breaking before a @tag or a { in JavaScript. 1300eae32dcSDimitry Andric if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() && 131e8d8bef9SDimitry Andric (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) { 132e8d8bef9SDimitry Andric SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); 133e8d8bef9SDimitry Andric continue; 134e8d8bef9SDimitry Andric } 135e8d8bef9SDimitry Andric 1360b57cec5SDimitry Andric break; 1370b57cec5SDimitry Andric } 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric if (SpaceOffset == StringRef::npos || 1400b57cec5SDimitry Andric // Don't break at leading whitespace. 1410b57cec5SDimitry Andric Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 1420b57cec5SDimitry Andric // Make sure that we don't break at leading whitespace that 1430b57cec5SDimitry Andric // reaches past MaxSplit. 1440b57cec5SDimitry Andric StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 14581ad6265SDimitry Andric if (FirstNonWhitespace == StringRef::npos) { 1460b57cec5SDimitry Andric // If the comment is only whitespace, we cannot split. 1470b57cec5SDimitry Andric return BreakableToken::Split(StringRef::npos, 0); 14881ad6265SDimitry Andric } 1490b57cec5SDimitry Andric SpaceOffset = Text.find_first_of( 1500b57cec5SDimitry Andric Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 1510b57cec5SDimitry Andric } 1520b57cec5SDimitry Andric if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 1530b57cec5SDimitry Andric // adaptStartOfLine will break after lines starting with /** if the comment 1540b57cec5SDimitry Andric // is broken anywhere. Avoid emitting this break twice here. 1550b57cec5SDimitry Andric // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will 1560b57cec5SDimitry Andric // insert a break after /**, so this code must not insert the same break. 1570b57cec5SDimitry Andric if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*') 1580b57cec5SDimitry Andric return BreakableToken::Split(StringRef::npos, 0); 1590b57cec5SDimitry Andric StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 1600b57cec5SDimitry Andric StringRef AfterCut = Text.substr(SpaceOffset); 1610b57cec5SDimitry Andric // Don't trim the leading blanks if it would create a */ after the break. 1620b57cec5SDimitry Andric if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/') 1630b57cec5SDimitry Andric AfterCut = AfterCut.ltrim(Blanks); 1640b57cec5SDimitry Andric return BreakableToken::Split(BeforeCut.size(), 1650b57cec5SDimitry Andric AfterCut.begin() - BeforeCut.end()); 1660b57cec5SDimitry Andric } 1670b57cec5SDimitry Andric return BreakableToken::Split(StringRef::npos, 0); 1680b57cec5SDimitry Andric } 1690b57cec5SDimitry Andric 1700b57cec5SDimitry Andric static BreakableToken::Split 1710b57cec5SDimitry Andric getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, 1720b57cec5SDimitry Andric unsigned TabWidth, encoding::Encoding Encoding) { 1730b57cec5SDimitry Andric // FIXME: Reduce unit test case. 1740b57cec5SDimitry Andric if (Text.empty()) 1750b57cec5SDimitry Andric return BreakableToken::Split(StringRef::npos, 0); 1760b57cec5SDimitry Andric if (ColumnLimit <= UsedColumns) 1770b57cec5SDimitry Andric return BreakableToken::Split(StringRef::npos, 0); 1780b57cec5SDimitry Andric unsigned MaxSplit = ColumnLimit - UsedColumns; 1790b57cec5SDimitry Andric StringRef::size_type SpaceOffset = 0; 1800b57cec5SDimitry Andric StringRef::size_type SlashOffset = 0; 1810b57cec5SDimitry Andric StringRef::size_type WordStartOffset = 0; 1820b57cec5SDimitry Andric StringRef::size_type SplitPoint = 0; 1830b57cec5SDimitry Andric for (unsigned Chars = 0;;) { 1840b57cec5SDimitry Andric unsigned Advance; 1850b57cec5SDimitry Andric if (Text[0] == '\\') { 1860b57cec5SDimitry Andric Advance = encoding::getEscapeSequenceLength(Text); 1870b57cec5SDimitry Andric Chars += Advance; 1880b57cec5SDimitry Andric } else { 1890b57cec5SDimitry Andric Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 1900b57cec5SDimitry Andric Chars += encoding::columnWidthWithTabs( 1910b57cec5SDimitry Andric Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric if (Chars > MaxSplit || Text.size() <= Advance) 1950b57cec5SDimitry Andric break; 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric if (IsBlank(Text[0])) 1980b57cec5SDimitry Andric SpaceOffset = SplitPoint; 1990b57cec5SDimitry Andric if (Text[0] == '/') 2000b57cec5SDimitry Andric SlashOffset = SplitPoint; 2010b57cec5SDimitry Andric if (Advance == 1 && !isAlphanumeric(Text[0])) 2020b57cec5SDimitry Andric WordStartOffset = SplitPoint; 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric SplitPoint += Advance; 2050b57cec5SDimitry Andric Text = Text.substr(Advance); 2060b57cec5SDimitry Andric } 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric if (SpaceOffset != 0) 2090b57cec5SDimitry Andric return BreakableToken::Split(SpaceOffset + 1, 0); 2100b57cec5SDimitry Andric if (SlashOffset != 0) 2110b57cec5SDimitry Andric return BreakableToken::Split(SlashOffset + 1, 0); 2120b57cec5SDimitry Andric if (WordStartOffset != 0) 2130b57cec5SDimitry Andric return BreakableToken::Split(WordStartOffset + 1, 0); 2140b57cec5SDimitry Andric if (SplitPoint != 0) 2150b57cec5SDimitry Andric return BreakableToken::Split(SplitPoint, 0); 2160b57cec5SDimitry Andric return BreakableToken::Split(StringRef::npos, 0); 2170b57cec5SDimitry Andric } 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric bool switchesFormatting(const FormatToken &Token) { 2200b57cec5SDimitry Andric assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) && 2210b57cec5SDimitry Andric "formatting regions are switched by comment tokens"); 2220b57cec5SDimitry Andric StringRef Content = Token.TokenText.substr(2).ltrim(); 2235f757f3fSDimitry Andric return Content.starts_with("clang-format on") || 2245f757f3fSDimitry Andric Content.starts_with("clang-format off"); 2250b57cec5SDimitry Andric } 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric unsigned 2280b57cec5SDimitry Andric BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns, 2290b57cec5SDimitry Andric Split Split) const { 2300b57cec5SDimitry Andric // Example: consider the content 2310b57cec5SDimitry Andric // lala lala 2320b57cec5SDimitry Andric // - RemainingTokenColumns is the original number of columns, 10; 2330b57cec5SDimitry Andric // - Split is (4, 2), denoting the two spaces between the two words; 2340b57cec5SDimitry Andric // 2350b57cec5SDimitry Andric // We compute the number of columns when the split is compressed into a single 2360b57cec5SDimitry Andric // space, like: 2370b57cec5SDimitry Andric // lala lala 2380b57cec5SDimitry Andric // 2390b57cec5SDimitry Andric // FIXME: Correctly measure the length of whitespace in Split.second so it 2400b57cec5SDimitry Andric // works with tabs. 2410b57cec5SDimitry Andric return RemainingTokenColumns + 1 - Split.second; 2420b57cec5SDimitry Andric } 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric unsigned BreakableStringLiteral::getLineCount() const { return 1; } 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex, 2470b57cec5SDimitry Andric unsigned Offset, 2480b57cec5SDimitry Andric StringRef::size_type Length, 2490b57cec5SDimitry Andric unsigned StartColumn) const { 2500b57cec5SDimitry Andric llvm_unreachable("Getting the length of a part of the string literal " 2510b57cec5SDimitry Andric "indicates that the code tries to reflow it."); 2520b57cec5SDimitry Andric } 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric unsigned 2550b57cec5SDimitry Andric BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset, 2560b57cec5SDimitry Andric unsigned StartColumn) const { 2570b57cec5SDimitry Andric return UnbreakableTailLength + Postfix.size() + 2581fd87a68SDimitry Andric encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn, 2591fd87a68SDimitry Andric Style.TabWidth, Encoding); 2600b57cec5SDimitry Andric } 2610b57cec5SDimitry Andric 2620b57cec5SDimitry Andric unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex, 2630b57cec5SDimitry Andric bool Break) const { 2640b57cec5SDimitry Andric return StartColumn + Prefix.size(); 2650b57cec5SDimitry Andric } 2660b57cec5SDimitry Andric 2670b57cec5SDimitry Andric BreakableStringLiteral::BreakableStringLiteral( 2680b57cec5SDimitry Andric const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, 2690b57cec5SDimitry Andric StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, 2700b57cec5SDimitry Andric encoding::Encoding Encoding, const FormatStyle &Style) 2710b57cec5SDimitry Andric : BreakableToken(Tok, InPPDirective, Encoding, Style), 2720b57cec5SDimitry Andric StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix), 2730b57cec5SDimitry Andric UnbreakableTailLength(UnbreakableTailLength) { 2745f757f3fSDimitry Andric assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix)); 2750b57cec5SDimitry Andric Line = Tok.TokenText.substr( 2760b57cec5SDimitry Andric Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 2770b57cec5SDimitry Andric } 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric BreakableToken::Split BreakableStringLiteral::getSplit( 2800b57cec5SDimitry Andric unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, 281480093f4SDimitry Andric unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const { 2820b57cec5SDimitry Andric return getStringSplit(Line.substr(TailOffset), ContentStartColumn, 2830b57cec5SDimitry Andric ColumnLimit - Postfix.size(), Style.TabWidth, Encoding); 2840b57cec5SDimitry Andric } 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric void BreakableStringLiteral::insertBreak(unsigned LineIndex, 2870b57cec5SDimitry Andric unsigned TailOffset, Split Split, 2880b57cec5SDimitry Andric unsigned ContentIndent, 2890b57cec5SDimitry Andric WhitespaceManager &Whitespaces) const { 2900b57cec5SDimitry Andric Whitespaces.replaceWhitespaceInToken( 2910b57cec5SDimitry Andric Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 2920b57cec5SDimitry Andric Prefix, InPPDirective, 1, StartColumn); 2930b57cec5SDimitry Andric } 2940b57cec5SDimitry Andric 2955f757f3fSDimitry Andric BreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators( 2965f757f3fSDimitry Andric const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus, 2975f757f3fSDimitry Andric unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective, 2985f757f3fSDimitry Andric encoding::Encoding Encoding, const FormatStyle &Style) 2995f757f3fSDimitry Andric : BreakableStringLiteral( 3005f757f3fSDimitry Andric Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'" 3015f757f3fSDimitry Andric : QuoteStyle == AtDoubleQuotes ? "@\"" 3025f757f3fSDimitry Andric : "\"", 3035f757f3fSDimitry Andric /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"", 3045f757f3fSDimitry Andric UnbreakableTailLength, InPPDirective, Encoding, Style), 3055f757f3fSDimitry Andric BracesNeeded(Tok.isNot(TT_StringInConcatenation)), 3065f757f3fSDimitry Andric QuoteStyle(QuoteStyle) { 3075f757f3fSDimitry Andric // Find the replacement text for inserting braces and quotes and line breaks. 3085f757f3fSDimitry Andric // We don't create an allocated string concatenated from parts here because it 3095f757f3fSDimitry Andric // has to outlive the BreakableStringliteral object. The brace replacements 3105f757f3fSDimitry Andric // include a quote so that WhitespaceManager can tell it apart from whitespace 3115f757f3fSDimitry Andric // replacements between the string and surrounding tokens. 3125f757f3fSDimitry Andric 3135f757f3fSDimitry Andric // The option is not implemented in JavaScript. 3145f757f3fSDimitry Andric bool SignOnNewLine = 3155f757f3fSDimitry Andric !Style.isJavaScript() && 3165f757f3fSDimitry Andric Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; 3175f757f3fSDimitry Andric 3185f757f3fSDimitry Andric if (Style.isVerilog()) { 3195f757f3fSDimitry Andric // In Verilog, all strings are quoted by double quotes, joined by commas, 3205f757f3fSDimitry Andric // and wrapped in braces. The comma is always before the newline. 3215f757f3fSDimitry Andric assert(QuoteStyle == DoubleQuotes); 3225f757f3fSDimitry Andric LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \""; 3235f757f3fSDimitry Andric RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }"; 3245f757f3fSDimitry Andric Postfix = "\","; 3255f757f3fSDimitry Andric Prefix = "\""; 3265f757f3fSDimitry Andric } else { 3275f757f3fSDimitry Andric // The plus sign may be on either line. And also C# and JavaScript have 3285f757f3fSDimitry Andric // several quoting styles. 3295f757f3fSDimitry Andric if (QuoteStyle == SingleQuotes) { 3305f757f3fSDimitry Andric LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('"; 3315f757f3fSDimitry Andric RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')"; 3325f757f3fSDimitry Andric Postfix = SignOnNewLine ? "'" : "' +"; 3335f757f3fSDimitry Andric Prefix = SignOnNewLine ? "+ '" : "'"; 3345f757f3fSDimitry Andric } else { 3355f757f3fSDimitry Andric if (QuoteStyle == AtDoubleQuotes) { 3365f757f3fSDimitry Andric LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@"; 3375f757f3fSDimitry Andric Prefix = SignOnNewLine ? "+ @\"" : "@\""; 3385f757f3fSDimitry Andric } else { 3395f757f3fSDimitry Andric LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\""; 3405f757f3fSDimitry Andric Prefix = SignOnNewLine ? "+ \"" : "\""; 3415f757f3fSDimitry Andric } 3425f757f3fSDimitry Andric RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")"; 3435f757f3fSDimitry Andric Postfix = SignOnNewLine ? "\"" : "\" +"; 3445f757f3fSDimitry Andric } 3455f757f3fSDimitry Andric } 3465f757f3fSDimitry Andric 3475f757f3fSDimitry Andric // Following lines are indented by the width of the brace and space if any. 3485f757f3fSDimitry Andric ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0; 3495f757f3fSDimitry Andric // The plus sign may need to be unindented depending on the style. 3505f757f3fSDimitry Andric // FIXME: Add support for DontAlign. 3515f757f3fSDimitry Andric if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus && 3525f757f3fSDimitry Andric Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) { 3535f757f3fSDimitry Andric ContinuationIndent -= 2; 3545f757f3fSDimitry Andric } 3555f757f3fSDimitry Andric } 3565f757f3fSDimitry Andric 3575f757f3fSDimitry Andric unsigned BreakableStringLiteralUsingOperators::getRemainingLength( 3585f757f3fSDimitry Andric unsigned LineIndex, unsigned Offset, unsigned StartColumn) const { 3595f757f3fSDimitry Andric return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) + 3605f757f3fSDimitry Andric encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn, 3615f757f3fSDimitry Andric Style.TabWidth, Encoding); 3625f757f3fSDimitry Andric } 3635f757f3fSDimitry Andric 3645f757f3fSDimitry Andric unsigned 3655f757f3fSDimitry Andric BreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex, 3665f757f3fSDimitry Andric bool Break) const { 3675f757f3fSDimitry Andric return std::max( 3685f757f3fSDimitry Andric 0, 3695f757f3fSDimitry Andric static_cast<int>(StartColumn) + 3705f757f3fSDimitry Andric (Break ? ContinuationIndent + static_cast<int>(Prefix.size()) 3715f757f3fSDimitry Andric : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1 3725f757f3fSDimitry Andric : 0) + 3735f757f3fSDimitry Andric (QuoteStyle == AtDoubleQuotes ? 2 : 1))); 3745f757f3fSDimitry Andric } 3755f757f3fSDimitry Andric 3765f757f3fSDimitry Andric void BreakableStringLiteralUsingOperators::insertBreak( 3775f757f3fSDimitry Andric unsigned LineIndex, unsigned TailOffset, Split Split, 3785f757f3fSDimitry Andric unsigned ContentIndent, WhitespaceManager &Whitespaces) const { 3795f757f3fSDimitry Andric Whitespaces.replaceWhitespaceInToken( 3805f757f3fSDimitry Andric Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset + 3815f757f3fSDimitry Andric Split.first, 3825f757f3fSDimitry Andric /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix, 3835f757f3fSDimitry Andric /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1, 3845f757f3fSDimitry Andric /*Spaces=*/ 3855f757f3fSDimitry Andric std::max(0, static_cast<int>(StartColumn) + ContinuationIndent)); 3865f757f3fSDimitry Andric } 3875f757f3fSDimitry Andric 3885f757f3fSDimitry Andric void BreakableStringLiteralUsingOperators::updateAfterBroken( 3895f757f3fSDimitry Andric WhitespaceManager &Whitespaces) const { 3905f757f3fSDimitry Andric // Add the braces required for breaking the token if they are needed. 3915f757f3fSDimitry Andric if (!BracesNeeded) 3925f757f3fSDimitry Andric return; 3935f757f3fSDimitry Andric 3945f757f3fSDimitry Andric // To add a brace or parenthesis, we replace the quote (or the at sign) with a 3955f757f3fSDimitry Andric // brace and another quote. This is because the rest of the program requires 3965f757f3fSDimitry Andric // one replacement for each source range. If we replace the empty strings 3975f757f3fSDimitry Andric // around the string, it may conflict with whitespace replacements between the 3985f757f3fSDimitry Andric // string and adjacent tokens. 3995f757f3fSDimitry Andric Whitespaces.replaceWhitespaceInToken( 4005f757f3fSDimitry Andric Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"", 4015f757f3fSDimitry Andric /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0, 4025f757f3fSDimitry Andric /*Spaces=*/0); 4035f757f3fSDimitry Andric Whitespaces.replaceWhitespaceInToken( 4045f757f3fSDimitry Andric Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1, 4055f757f3fSDimitry Andric /*PreviousPostfix=*/RightBraceQuote, 4065f757f3fSDimitry Andric /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0); 4075f757f3fSDimitry Andric } 4085f757f3fSDimitry Andric 4090b57cec5SDimitry Andric BreakableComment::BreakableComment(const FormatToken &Token, 4100b57cec5SDimitry Andric unsigned StartColumn, bool InPPDirective, 4110b57cec5SDimitry Andric encoding::Encoding Encoding, 4120b57cec5SDimitry Andric const FormatStyle &Style) 4130b57cec5SDimitry Andric : BreakableToken(Token, InPPDirective, Encoding, Style), 4140b57cec5SDimitry Andric StartColumn(StartColumn) {} 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric unsigned BreakableComment::getLineCount() const { return Lines.size(); } 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric BreakableToken::Split 4190b57cec5SDimitry Andric BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset, 4200b57cec5SDimitry Andric unsigned ColumnLimit, unsigned ContentStartColumn, 421480093f4SDimitry Andric const llvm::Regex &CommentPragmasRegex) const { 4220b57cec5SDimitry Andric // Don't break lines matching the comment pragmas regex. 4230b57cec5SDimitry Andric if (CommentPragmasRegex.match(Content[LineIndex])) 4240b57cec5SDimitry Andric return Split(StringRef::npos, 0); 4250b57cec5SDimitry Andric return getCommentSplit(Content[LineIndex].substr(TailOffset), 4260b57cec5SDimitry Andric ContentStartColumn, ColumnLimit, Style.TabWidth, 4270b57cec5SDimitry Andric Encoding, Style); 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric 4300b57cec5SDimitry Andric void BreakableComment::compressWhitespace( 4310b57cec5SDimitry Andric unsigned LineIndex, unsigned TailOffset, Split Split, 4320b57cec5SDimitry Andric WhitespaceManager &Whitespaces) const { 4330b57cec5SDimitry Andric StringRef Text = Content[LineIndex].substr(TailOffset); 4340b57cec5SDimitry Andric // Text is relative to the content line, but Whitespaces operates relative to 4350b57cec5SDimitry Andric // the start of the corresponding token, so compute the start of the Split 4360b57cec5SDimitry Andric // that needs to be compressed into a single space relative to the start of 4370b57cec5SDimitry Andric // its token. 4380b57cec5SDimitry Andric unsigned BreakOffsetInToken = 4390b57cec5SDimitry Andric Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 4400b57cec5SDimitry Andric unsigned CharsToRemove = Split.second; 4410b57cec5SDimitry Andric Whitespaces.replaceWhitespaceInToken( 4420b57cec5SDimitry Andric tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "", 4430b57cec5SDimitry Andric /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); 4440b57cec5SDimitry Andric } 4450b57cec5SDimitry Andric 4460b57cec5SDimitry Andric const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const { 4470b57cec5SDimitry Andric return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok; 4480b57cec5SDimitry Andric } 4490b57cec5SDimitry Andric 4500b57cec5SDimitry Andric static bool mayReflowContent(StringRef Content) { 4510b57cec5SDimitry Andric Content = Content.trim(Blanks); 452*0fca6ea1SDimitry Andric // Lines starting with '@' or '\' commonly have special meaning. 4530b57cec5SDimitry Andric // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. 4540b57cec5SDimitry Andric bool hasSpecialMeaningPrefix = false; 4550b57cec5SDimitry Andric for (StringRef Prefix : 456*0fca6ea1SDimitry Andric {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) { 4575f757f3fSDimitry Andric if (Content.starts_with(Prefix)) { 4580b57cec5SDimitry Andric hasSpecialMeaningPrefix = true; 4590b57cec5SDimitry Andric break; 4600b57cec5SDimitry Andric } 4610b57cec5SDimitry Andric } 4620b57cec5SDimitry Andric 4630b57cec5SDimitry Andric // Numbered lists may also start with a number followed by '.' 4640b57cec5SDimitry Andric // To avoid issues if a line starts with a number which is actually the end 4650b57cec5SDimitry Andric // of a previous line, we only consider numbers with up to 2 digits. 466480093f4SDimitry Andric static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. "); 4670b57cec5SDimitry Andric hasSpecialMeaningPrefix = 468480093f4SDimitry Andric hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content); 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric // Simple heuristic for what to reflow: content should contain at least two 4710b57cec5SDimitry Andric // characters and either the first or second character must be 4720b57cec5SDimitry Andric // non-punctuation. 4730b57cec5SDimitry Andric return Content.size() >= 2 && !hasSpecialMeaningPrefix && 4745f757f3fSDimitry Andric !Content.ends_with("\\") && 4750b57cec5SDimitry Andric // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is 4760b57cec5SDimitry Andric // true, then the first code point must be 1 byte long. 4770b57cec5SDimitry Andric (!isPunctuation(Content[0]) || !isPunctuation(Content[1])); 4780b57cec5SDimitry Andric } 4790b57cec5SDimitry Andric 4800b57cec5SDimitry Andric BreakableBlockComment::BreakableBlockComment( 4810b57cec5SDimitry Andric const FormatToken &Token, unsigned StartColumn, 4820b57cec5SDimitry Andric unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 4830b57cec5SDimitry Andric encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF) 4840b57cec5SDimitry Andric : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style), 4850b57cec5SDimitry Andric DelimitersOnNewline(false), 4860b57cec5SDimitry Andric UnbreakableTailLength(Token.UnbreakableTailLength) { 4870b57cec5SDimitry Andric assert(Tok.is(TT_BlockComment) && 4880b57cec5SDimitry Andric "block comment section must start with a block comment"); 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric StringRef TokenText(Tok.TokenText); 4915f757f3fSDimitry Andric assert(TokenText.starts_with("/*") && TokenText.ends_with("*/")); 492a7dea167SDimitry Andric TokenText.substr(2, TokenText.size() - 4) 493a7dea167SDimitry Andric .split(Lines, UseCRLF ? "\r\n" : "\n"); 4940b57cec5SDimitry Andric 4950b57cec5SDimitry Andric int IndentDelta = StartColumn - OriginalStartColumn; 4960b57cec5SDimitry Andric Content.resize(Lines.size()); 4970b57cec5SDimitry Andric Content[0] = Lines[0]; 4980b57cec5SDimitry Andric ContentColumn.resize(Lines.size()); 4990b57cec5SDimitry Andric // Account for the initial '/*'. 5000b57cec5SDimitry Andric ContentColumn[0] = StartColumn + 2; 5010b57cec5SDimitry Andric Tokens.resize(Lines.size()); 5020b57cec5SDimitry Andric for (size_t i = 1; i < Lines.size(); ++i) 5030b57cec5SDimitry Andric adjustWhitespace(i, IndentDelta); 5040b57cec5SDimitry Andric 5050b57cec5SDimitry Andric // Align decorations with the column of the star on the first line, 5060b57cec5SDimitry Andric // that is one column after the start "/*". 5070b57cec5SDimitry Andric DecorationColumn = StartColumn + 1; 5080b57cec5SDimitry Andric 5090b57cec5SDimitry Andric // Account for comment decoration patterns like this: 5100b57cec5SDimitry Andric // 5110b57cec5SDimitry Andric // /* 5120b57cec5SDimitry Andric // ** blah blah blah 5130b57cec5SDimitry Andric // */ 5145f757f3fSDimitry Andric if (Lines.size() >= 2 && Content[1].starts_with("**") && 5150b57cec5SDimitry Andric static_cast<unsigned>(ContentColumn[1]) == StartColumn) { 5160b57cec5SDimitry Andric DecorationColumn = StartColumn; 5170b57cec5SDimitry Andric } 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric Decoration = "* "; 5200b57cec5SDimitry Andric if (Lines.size() == 1 && !FirstInLine) { 5210b57cec5SDimitry Andric // Comments for which FirstInLine is false can start on arbitrary column, 5220b57cec5SDimitry Andric // and available horizontal space can be too small to align consecutive 5230b57cec5SDimitry Andric // lines with the first one. 5240b57cec5SDimitry Andric // FIXME: We could, probably, align them to current indentation level, but 5250b57cec5SDimitry Andric // now we just wrap them without stars. 5260b57cec5SDimitry Andric Decoration = ""; 5270b57cec5SDimitry Andric } 52881ad6265SDimitry Andric for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) { 52981ad6265SDimitry Andric const StringRef &Text = Content[i]; 53081ad6265SDimitry Andric if (i + 1 == e) { 5310b57cec5SDimitry Andric // If the last line is empty, the closing "*/" will have a star. 53281ad6265SDimitry Andric if (Text.empty()) 5330b57cec5SDimitry Andric break; 5345f757f3fSDimitry Andric } else if (!Text.empty() && Decoration.starts_with(Text)) { 5350b57cec5SDimitry Andric continue; 53681ad6265SDimitry Andric } 5375f757f3fSDimitry Andric while (!Text.starts_with(Decoration)) 53881ad6265SDimitry Andric Decoration = Decoration.drop_back(1); 5390b57cec5SDimitry Andric } 5400b57cec5SDimitry Andric 5410b57cec5SDimitry Andric LastLineNeedsDecoration = true; 5420b57cec5SDimitry Andric IndentAtLineBreak = ContentColumn[0] + 1; 5430b57cec5SDimitry Andric for (size_t i = 1, e = Lines.size(); i < e; ++i) { 5440b57cec5SDimitry Andric if (Content[i].empty()) { 5450b57cec5SDimitry Andric if (i + 1 == e) { 5460b57cec5SDimitry Andric // Empty last line means that we already have a star as a part of the 5470b57cec5SDimitry Andric // trailing */. We also need to preserve whitespace, so that */ is 5480b57cec5SDimitry Andric // correctly indented. 5490b57cec5SDimitry Andric LastLineNeedsDecoration = false; 5500b57cec5SDimitry Andric // Align the star in the last '*/' with the stars on the previous lines. 55181ad6265SDimitry Andric if (e >= 2 && !Decoration.empty()) 5520b57cec5SDimitry Andric ContentColumn[i] = DecorationColumn; 5530b57cec5SDimitry Andric } else if (Decoration.empty()) { 5540b57cec5SDimitry Andric // For all other lines, set the start column to 0 if they're empty, so 5550b57cec5SDimitry Andric // we do not insert trailing whitespace anywhere. 5560b57cec5SDimitry Andric ContentColumn[i] = 0; 5570b57cec5SDimitry Andric } 5580b57cec5SDimitry Andric continue; 5590b57cec5SDimitry Andric } 5600b57cec5SDimitry Andric 5610b57cec5SDimitry Andric // The first line already excludes the star. 5620b57cec5SDimitry Andric // The last line excludes the star if LastLineNeedsDecoration is false. 5630b57cec5SDimitry Andric // For all other lines, adjust the line to exclude the star and 5640b57cec5SDimitry Andric // (optionally) the first whitespace. 5655f757f3fSDimitry Andric unsigned DecorationSize = Decoration.starts_with(Content[i]) 5660b57cec5SDimitry Andric ? Content[i].size() 5670b57cec5SDimitry Andric : Decoration.size(); 56881ad6265SDimitry Andric if (DecorationSize) 5690b57cec5SDimitry Andric ContentColumn[i] = DecorationColumn + DecorationSize; 5700b57cec5SDimitry Andric Content[i] = Content[i].substr(DecorationSize); 5715f757f3fSDimitry Andric if (!Decoration.starts_with(Content[i])) { 5720b57cec5SDimitry Andric IndentAtLineBreak = 5730b57cec5SDimitry Andric std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i])); 5740b57cec5SDimitry Andric } 57581ad6265SDimitry Andric } 5760b57cec5SDimitry Andric IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 5770b57cec5SDimitry Andric 5780b57cec5SDimitry Andric // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case. 5790eae32dcSDimitry Andric if (Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) { 5805f757f3fSDimitry Andric if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) { 5810b57cec5SDimitry Andric // This is a multiline jsdoc comment. 5820b57cec5SDimitry Andric DelimitersOnNewline = true; 5835f757f3fSDimitry Andric } else if (Lines[0].starts_with("* ") && Lines.size() == 1) { 5840b57cec5SDimitry Andric // Detect a long single-line comment, like: 5850b57cec5SDimitry Andric // /** long long long */ 5860b57cec5SDimitry Andric // Below, '2' is the width of '*/'. 5870b57cec5SDimitry Andric unsigned EndColumn = 5880b57cec5SDimitry Andric ContentColumn[0] + 5890b57cec5SDimitry Andric encoding::columnWidthWithTabs(Lines[0], ContentColumn[0], 5900b57cec5SDimitry Andric Style.TabWidth, Encoding) + 5910b57cec5SDimitry Andric 2; 5920b57cec5SDimitry Andric DelimitersOnNewline = EndColumn > Style.ColumnLimit; 5930b57cec5SDimitry Andric } 5940b57cec5SDimitry Andric } 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric LLVM_DEBUG({ 5970b57cec5SDimitry Andric llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; 5980b57cec5SDimitry Andric llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n"; 5990b57cec5SDimitry Andric for (size_t i = 0; i < Lines.size(); ++i) { 6000b57cec5SDimitry Andric llvm::dbgs() << i << " |" << Content[i] << "| " 6010b57cec5SDimitry Andric << "CC=" << ContentColumn[i] << "| " 6020b57cec5SDimitry Andric << "IN=" << (Content[i].data() - Lines[i].data()) << "\n"; 6030b57cec5SDimitry Andric } 6040b57cec5SDimitry Andric }); 6050b57cec5SDimitry Andric } 6060b57cec5SDimitry Andric 607a7dea167SDimitry Andric BreakableToken::Split BreakableBlockComment::getSplit( 608a7dea167SDimitry Andric unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, 609480093f4SDimitry Andric unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const { 6100b57cec5SDimitry Andric // Don't break lines matching the comment pragmas regex. 6110b57cec5SDimitry Andric if (CommentPragmasRegex.match(Content[LineIndex])) 6120b57cec5SDimitry Andric return Split(StringRef::npos, 0); 6130b57cec5SDimitry Andric return getCommentSplit(Content[LineIndex].substr(TailOffset), 6140b57cec5SDimitry Andric ContentStartColumn, ColumnLimit, Style.TabWidth, 6155f757f3fSDimitry Andric Encoding, Style, Decoration.ends_with("*")); 6160b57cec5SDimitry Andric } 6170b57cec5SDimitry Andric 6180b57cec5SDimitry Andric void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, 6190b57cec5SDimitry Andric int IndentDelta) { 6200b57cec5SDimitry Andric // When in a preprocessor directive, the trailing backslash in a block comment 6210b57cec5SDimitry Andric // is not needed, but can serve a purpose of uniformity with necessary escaped 6220b57cec5SDimitry Andric // newlines outside the comment. In this case we remove it here before 6230b57cec5SDimitry Andric // trimming the trailing whitespace. The backslash will be re-added later when 6240b57cec5SDimitry Andric // inserting a line break. 6250b57cec5SDimitry Andric size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 6265f757f3fSDimitry Andric if (InPPDirective && Lines[LineIndex - 1].ends_with("\\")) 6270b57cec5SDimitry Andric --EndOfPreviousLine; 6280b57cec5SDimitry Andric 6290b57cec5SDimitry Andric // Calculate the end of the non-whitespace text in the previous line. 6300b57cec5SDimitry Andric EndOfPreviousLine = 6310b57cec5SDimitry Andric Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 6320b57cec5SDimitry Andric if (EndOfPreviousLine == StringRef::npos) 6330b57cec5SDimitry Andric EndOfPreviousLine = 0; 6340b57cec5SDimitry Andric else 6350b57cec5SDimitry Andric ++EndOfPreviousLine; 6360b57cec5SDimitry Andric // Calculate the start of the non-whitespace text in the current line. 6370b57cec5SDimitry Andric size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 6380b57cec5SDimitry Andric if (StartOfLine == StringRef::npos) 6390b57cec5SDimitry Andric StartOfLine = Lines[LineIndex].size(); 6400b57cec5SDimitry Andric 6410b57cec5SDimitry Andric StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); 6420b57cec5SDimitry Andric // Adjust Lines to only contain relevant text. 6430b57cec5SDimitry Andric size_t PreviousContentOffset = 6440b57cec5SDimitry Andric Content[LineIndex - 1].data() - Lines[LineIndex - 1].data(); 6450b57cec5SDimitry Andric Content[LineIndex - 1] = Lines[LineIndex - 1].substr( 6460b57cec5SDimitry Andric PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset); 6470b57cec5SDimitry Andric Content[LineIndex] = Lines[LineIndex].substr(StartOfLine); 6480b57cec5SDimitry Andric 6490b57cec5SDimitry Andric // Adjust the start column uniformly across all lines. 6500b57cec5SDimitry Andric ContentColumn[LineIndex] = 6510b57cec5SDimitry Andric encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + 6520b57cec5SDimitry Andric IndentDelta; 6530b57cec5SDimitry Andric } 6540b57cec5SDimitry Andric 6550b57cec5SDimitry Andric unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex, 6560b57cec5SDimitry Andric unsigned Offset, 6570b57cec5SDimitry Andric StringRef::size_type Length, 6580b57cec5SDimitry Andric unsigned StartColumn) const { 6591fd87a68SDimitry Andric return encoding::columnWidthWithTabs( 6601fd87a68SDimitry Andric Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth, 6611fd87a68SDimitry Andric Encoding); 6620b57cec5SDimitry Andric } 6630b57cec5SDimitry Andric 6640b57cec5SDimitry Andric unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex, 6650b57cec5SDimitry Andric unsigned Offset, 6660b57cec5SDimitry Andric unsigned StartColumn) const { 6671fd87a68SDimitry Andric unsigned LineLength = 6681fd87a68SDimitry Andric UnbreakableTailLength + 6690b57cec5SDimitry Andric getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn); 6701fd87a68SDimitry Andric if (LineIndex + 1 == Lines.size()) { 6711fd87a68SDimitry Andric LineLength += 2; 6721fd87a68SDimitry Andric // We never need a decoration when breaking just the trailing "*/" postfix. 6731fd87a68SDimitry Andric bool HasRemainingText = Offset < Content[LineIndex].size(); 6741fd87a68SDimitry Andric if (!HasRemainingText) { 6755f757f3fSDimitry Andric bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration); 6761fd87a68SDimitry Andric if (HasDecoration) 6771fd87a68SDimitry Andric LineLength -= Decoration.size(); 6781fd87a68SDimitry Andric } 6791fd87a68SDimitry Andric } 6801fd87a68SDimitry Andric return LineLength; 6810b57cec5SDimitry Andric } 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 6840b57cec5SDimitry Andric bool Break) const { 6850b57cec5SDimitry Andric if (Break) 6860b57cec5SDimitry Andric return IndentAtLineBreak; 6870b57cec5SDimitry Andric return std::max(0, ContentColumn[LineIndex]); 6880b57cec5SDimitry Andric } 6890b57cec5SDimitry Andric 6900b57cec5SDimitry Andric const llvm::StringSet<> 6910b57cec5SDimitry Andric BreakableBlockComment::ContentIndentingJavadocAnnotations = { 6920b57cec5SDimitry Andric "@param", "@return", "@returns", "@throws", "@type", "@template", 6930b57cec5SDimitry Andric "@see", "@deprecated", "@define", "@exports", "@mods", "@private", 6940b57cec5SDimitry Andric }; 6950b57cec5SDimitry Andric 6960b57cec5SDimitry Andric unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const { 6970eae32dcSDimitry Andric if (Style.Language != FormatStyle::LK_Java && !Style.isJavaScript()) 6980b57cec5SDimitry Andric return 0; 6990b57cec5SDimitry Andric // The content at LineIndex 0 of a comment like: 7000b57cec5SDimitry Andric // /** line 0 */ 7010b57cec5SDimitry Andric // is "* line 0", so we need to skip over the decoration in that case. 7020b57cec5SDimitry Andric StringRef ContentWithNoDecoration = Content[LineIndex]; 7035f757f3fSDimitry Andric if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*")) 7040b57cec5SDimitry Andric ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks); 7050b57cec5SDimitry Andric StringRef FirstWord = ContentWithNoDecoration.substr( 7060b57cec5SDimitry Andric 0, ContentWithNoDecoration.find_first_of(Blanks)); 70706c3fb27SDimitry Andric if (ContentIndentingJavadocAnnotations.contains(FirstWord)) 7080b57cec5SDimitry Andric return Style.ContinuationIndentWidth; 7090b57cec5SDimitry Andric return 0; 7100b57cec5SDimitry Andric } 7110b57cec5SDimitry Andric 7120b57cec5SDimitry Andric void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 7130b57cec5SDimitry Andric Split Split, unsigned ContentIndent, 7140b57cec5SDimitry Andric WhitespaceManager &Whitespaces) const { 7150b57cec5SDimitry Andric StringRef Text = Content[LineIndex].substr(TailOffset); 7160b57cec5SDimitry Andric StringRef Prefix = Decoration; 7170b57cec5SDimitry Andric // We need this to account for the case when we have a decoration "* " for all 7180b57cec5SDimitry Andric // the lines except for the last one, where the star in "*/" acts as a 7190b57cec5SDimitry Andric // decoration. 7200b57cec5SDimitry Andric unsigned LocalIndentAtLineBreak = IndentAtLineBreak; 7210b57cec5SDimitry Andric if (LineIndex + 1 == Lines.size() && 7220b57cec5SDimitry Andric Text.size() == Split.first + Split.second) { 7230b57cec5SDimitry Andric // For the last line we need to break before "*/", but not to add "* ". 7240b57cec5SDimitry Andric Prefix = ""; 7250b57cec5SDimitry Andric if (LocalIndentAtLineBreak >= 2) 7260b57cec5SDimitry Andric LocalIndentAtLineBreak -= 2; 7270b57cec5SDimitry Andric } 7280b57cec5SDimitry Andric // The split offset is from the beginning of the line. Convert it to an offset 7290b57cec5SDimitry Andric // from the beginning of the token text. 7300b57cec5SDimitry Andric unsigned BreakOffsetInToken = 7310b57cec5SDimitry Andric Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 7320b57cec5SDimitry Andric unsigned CharsToRemove = Split.second; 7330b57cec5SDimitry Andric assert(LocalIndentAtLineBreak >= Prefix.size()); 7345ffd83dbSDimitry Andric std::string PrefixWithTrailingIndent = std::string(Prefix); 7355ffd83dbSDimitry Andric PrefixWithTrailingIndent.append(ContentIndent, ' '); 7360b57cec5SDimitry Andric Whitespaces.replaceWhitespaceInToken( 7370b57cec5SDimitry Andric tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", 7380b57cec5SDimitry Andric PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1, 7390b57cec5SDimitry Andric /*Spaces=*/LocalIndentAtLineBreak + ContentIndent - 7400b57cec5SDimitry Andric PrefixWithTrailingIndent.size()); 7410b57cec5SDimitry Andric } 7420b57cec5SDimitry Andric 743480093f4SDimitry Andric BreakableToken::Split BreakableBlockComment::getReflowSplit( 744480093f4SDimitry Andric unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 7450b57cec5SDimitry Andric if (!mayReflow(LineIndex, CommentPragmasRegex)) 7460b57cec5SDimitry Andric return Split(StringRef::npos, 0); 7470b57cec5SDimitry Andric 7480b57cec5SDimitry Andric // If we're reflowing into a line with content indent, only reflow the next 7490b57cec5SDimitry Andric // line if its starting whitespace matches the content indent. 7500b57cec5SDimitry Andric size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); 7510b57cec5SDimitry Andric if (LineIndex) { 7520b57cec5SDimitry Andric unsigned PreviousContentIndent = getContentIndent(LineIndex - 1); 7530b57cec5SDimitry Andric if (PreviousContentIndent && Trimmed != StringRef::npos && 75481ad6265SDimitry Andric Trimmed != PreviousContentIndent) { 7550b57cec5SDimitry Andric return Split(StringRef::npos, 0); 7560b57cec5SDimitry Andric } 75781ad6265SDimitry Andric } 7580b57cec5SDimitry Andric 7590b57cec5SDimitry Andric return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); 7600b57cec5SDimitry Andric } 7610b57cec5SDimitry Andric 7620b57cec5SDimitry Andric bool BreakableBlockComment::introducesBreakBeforeToken() const { 7630b57cec5SDimitry Andric // A break is introduced when we want delimiters on newline. 7640b57cec5SDimitry Andric return DelimitersOnNewline && 7650b57cec5SDimitry Andric Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos; 7660b57cec5SDimitry Andric } 7670b57cec5SDimitry Andric 7680b57cec5SDimitry Andric void BreakableBlockComment::reflow(unsigned LineIndex, 7690b57cec5SDimitry Andric WhitespaceManager &Whitespaces) const { 7700b57cec5SDimitry Andric StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); 7710b57cec5SDimitry Andric // Here we need to reflow. 7720b57cec5SDimitry Andric assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && 7730b57cec5SDimitry Andric "Reflowing whitespace within a token"); 7740b57cec5SDimitry Andric // This is the offset of the end of the last line relative to the start of 7750b57cec5SDimitry Andric // the token text in the token. 7760b57cec5SDimitry Andric unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + 7770b57cec5SDimitry Andric Content[LineIndex - 1].size() - 7780b57cec5SDimitry Andric tokenAt(LineIndex).TokenText.data(); 7790b57cec5SDimitry Andric unsigned WhitespaceLength = TrimmedContent.data() - 7800b57cec5SDimitry Andric tokenAt(LineIndex).TokenText.data() - 7810b57cec5SDimitry Andric WhitespaceOffsetInToken; 7820b57cec5SDimitry Andric Whitespaces.replaceWhitespaceInToken( 7830b57cec5SDimitry Andric tokenAt(LineIndex), WhitespaceOffsetInToken, 7840b57cec5SDimitry Andric /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", 7850b57cec5SDimitry Andric /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0, 7860b57cec5SDimitry Andric /*Spaces=*/0); 7870b57cec5SDimitry Andric } 7880b57cec5SDimitry Andric 7890b57cec5SDimitry Andric void BreakableBlockComment::adaptStartOfLine( 7900b57cec5SDimitry Andric unsigned LineIndex, WhitespaceManager &Whitespaces) const { 7910b57cec5SDimitry Andric if (LineIndex == 0) { 7920b57cec5SDimitry Andric if (DelimitersOnNewline) { 7930b57cec5SDimitry Andric // Since we're breaking at index 1 below, the break position and the 7940b57cec5SDimitry Andric // break length are the same. 7950b57cec5SDimitry Andric // Note: this works because getCommentSplit is careful never to split at 7960b57cec5SDimitry Andric // the beginning of a line. 7970b57cec5SDimitry Andric size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks); 79881ad6265SDimitry Andric if (BreakLength != StringRef::npos) { 7990b57cec5SDimitry Andric insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0, 8000b57cec5SDimitry Andric Whitespaces); 8010b57cec5SDimitry Andric } 80281ad6265SDimitry Andric } 8030b57cec5SDimitry Andric return; 8040b57cec5SDimitry Andric } 8050b57cec5SDimitry Andric // Here no reflow with the previous line will happen. 8060b57cec5SDimitry Andric // Fix the decoration of the line at LineIndex. 8070b57cec5SDimitry Andric StringRef Prefix = Decoration; 8080b57cec5SDimitry Andric if (Content[LineIndex].empty()) { 8090b57cec5SDimitry Andric if (LineIndex + 1 == Lines.size()) { 8100b57cec5SDimitry Andric if (!LastLineNeedsDecoration) { 8110b57cec5SDimitry Andric // If the last line was empty, we don't need a prefix, as the */ will 8120b57cec5SDimitry Andric // line up with the decoration (if it exists). 8130b57cec5SDimitry Andric Prefix = ""; 8140b57cec5SDimitry Andric } 8150b57cec5SDimitry Andric } else if (!Decoration.empty()) { 8160b57cec5SDimitry Andric // For other empty lines, if we do have a decoration, adapt it to not 8170b57cec5SDimitry Andric // contain a trailing whitespace. 8180b57cec5SDimitry Andric Prefix = Prefix.substr(0, 1); 8190b57cec5SDimitry Andric } 82081ad6265SDimitry Andric } else if (ContentColumn[LineIndex] == 1) { 8210b57cec5SDimitry Andric // This line starts immediately after the decorating *. 8220b57cec5SDimitry Andric Prefix = Prefix.substr(0, 1); 8230b57cec5SDimitry Andric } 8240b57cec5SDimitry Andric // This is the offset of the end of the last line relative to the start of the 8250b57cec5SDimitry Andric // token text in the token. 8260b57cec5SDimitry Andric unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + 8270b57cec5SDimitry Andric Content[LineIndex - 1].size() - 8280b57cec5SDimitry Andric tokenAt(LineIndex).TokenText.data(); 8290b57cec5SDimitry Andric unsigned WhitespaceLength = Content[LineIndex].data() - 8300b57cec5SDimitry Andric tokenAt(LineIndex).TokenText.data() - 8310b57cec5SDimitry Andric WhitespaceOffsetInToken; 8320b57cec5SDimitry Andric Whitespaces.replaceWhitespaceInToken( 8330b57cec5SDimitry Andric tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix, 8340b57cec5SDimitry Andric InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size()); 8350b57cec5SDimitry Andric } 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andric BreakableToken::Split 8380b57cec5SDimitry Andric BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const { 8390b57cec5SDimitry Andric if (DelimitersOnNewline) { 8400b57cec5SDimitry Andric // Replace the trailing whitespace of the last line with a newline. 8410b57cec5SDimitry Andric // In case the last line is empty, the ending '*/' is already on its own 8420b57cec5SDimitry Andric // line. 8430b57cec5SDimitry Andric StringRef Line = Content.back().substr(TailOffset); 8440b57cec5SDimitry Andric StringRef TrimmedLine = Line.rtrim(Blanks); 8450b57cec5SDimitry Andric if (!TrimmedLine.empty()) 8460b57cec5SDimitry Andric return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size()); 8470b57cec5SDimitry Andric } 8480b57cec5SDimitry Andric return Split(StringRef::npos, 0); 8490b57cec5SDimitry Andric } 8500b57cec5SDimitry Andric 851480093f4SDimitry Andric bool BreakableBlockComment::mayReflow( 852480093f4SDimitry Andric unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 8530b57cec5SDimitry Andric // Content[LineIndex] may exclude the indent after the '*' decoration. In that 8540b57cec5SDimitry Andric // case, we compute the start of the comment pragma manually. 8550b57cec5SDimitry Andric StringRef IndentContent = Content[LineIndex]; 8565f757f3fSDimitry Andric if (Lines[LineIndex].ltrim(Blanks).starts_with("*")) 8570b57cec5SDimitry Andric IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1); 8580b57cec5SDimitry Andric return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && 8590b57cec5SDimitry Andric mayReflowContent(Content[LineIndex]) && !Tok.Finalized && 8600b57cec5SDimitry Andric !switchesFormatting(tokenAt(LineIndex)); 8610b57cec5SDimitry Andric } 8620b57cec5SDimitry Andric 8630b57cec5SDimitry Andric BreakableLineCommentSection::BreakableLineCommentSection( 864e8d8bef9SDimitry Andric const FormatToken &Token, unsigned StartColumn, bool InPPDirective, 8650b57cec5SDimitry Andric encoding::Encoding Encoding, const FormatStyle &Style) 8660b57cec5SDimitry Andric : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) { 8670b57cec5SDimitry Andric assert(Tok.is(TT_LineComment) && 8680b57cec5SDimitry Andric "line comment section must start with a line comment"); 8690b57cec5SDimitry Andric FormatToken *LineTok = nullptr; 87056f451bbSDimitry Andric const int Minimum = Style.SpacesInLineCommentPrefix.Minimum; 871fe6060f1SDimitry Andric // How many spaces we changed in the first line of the section, this will be 872fe6060f1SDimitry Andric // applied in all following lines 873fe6060f1SDimitry Andric int FirstLineSpaceChange = 0; 8740b57cec5SDimitry Andric for (const FormatToken *CurrentTok = &Tok; 8750b57cec5SDimitry Andric CurrentTok && CurrentTok->is(TT_LineComment); 8760b57cec5SDimitry Andric CurrentTok = CurrentTok->Next) { 8770b57cec5SDimitry Andric LastLineTok = LineTok; 8780b57cec5SDimitry Andric StringRef TokenText(CurrentTok->TokenText); 8795f757f3fSDimitry Andric assert((TokenText.starts_with("//") || TokenText.starts_with("#")) && 8800b57cec5SDimitry Andric "unsupported line comment prefix, '//' and '#' are supported"); 8810b57cec5SDimitry Andric size_t FirstLineIndex = Lines.size(); 8820b57cec5SDimitry Andric TokenText.split(Lines, "\n"); 8830b57cec5SDimitry Andric Content.resize(Lines.size()); 8840b57cec5SDimitry Andric ContentColumn.resize(Lines.size()); 885fe6060f1SDimitry Andric PrefixSpaceChange.resize(Lines.size()); 8860b57cec5SDimitry Andric Tokens.resize(Lines.size()); 8870b57cec5SDimitry Andric Prefix.resize(Lines.size()); 8880b57cec5SDimitry Andric OriginalPrefix.resize(Lines.size()); 8890b57cec5SDimitry Andric for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) { 8900b57cec5SDimitry Andric Lines[i] = Lines[i].ltrim(Blanks); 891e8d8bef9SDimitry Andric StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style); 892fe6060f1SDimitry Andric OriginalPrefix[i] = IndentPrefix; 89356f451bbSDimitry Andric const int SpacesInPrefix = llvm::count(IndentPrefix, ' '); 894fe6060f1SDimitry Andric 89581ad6265SDimitry Andric // This lambda also considers multibyte character that is not handled in 89681ad6265SDimitry Andric // functions like isPunctuation provided by CharInfo. 89781ad6265SDimitry Andric const auto NoSpaceBeforeFirstCommentChar = [&]() { 89881ad6265SDimitry Andric assert(Lines[i].size() > IndentPrefix.size()); 89981ad6265SDimitry Andric const char FirstCommentChar = Lines[i][IndentPrefix.size()]; 90081ad6265SDimitry Andric const unsigned FirstCharByteSize = 90181ad6265SDimitry Andric encoding::getCodePointNumBytes(FirstCommentChar, Encoding); 90281ad6265SDimitry Andric if (encoding::columnWidth( 90381ad6265SDimitry Andric Lines[i].substr(IndentPrefix.size(), FirstCharByteSize), 90481ad6265SDimitry Andric Encoding) != 1) { 90581ad6265SDimitry Andric return false; 90681ad6265SDimitry Andric } 90781ad6265SDimitry Andric // In C-like comments, add a space before #. For example this is useful 90881ad6265SDimitry Andric // to preserve the relative indentation when commenting out code with 90981ad6265SDimitry Andric // #includes. 91081ad6265SDimitry Andric // 91181ad6265SDimitry Andric // In languages using # as the comment leader such as proto, don't 91281ad6265SDimitry Andric // add a space to support patterns like: 91381ad6265SDimitry Andric // ######### 91481ad6265SDimitry Andric // # section 91581ad6265SDimitry Andric // ######### 9165f757f3fSDimitry Andric if (FirstCommentChar == '#' && !TokenText.starts_with("#")) 91781ad6265SDimitry Andric return false; 91881ad6265SDimitry Andric return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) || 91981ad6265SDimitry Andric isHorizontalWhitespace(FirstCommentChar); 92081ad6265SDimitry Andric }; 92181ad6265SDimitry Andric 922fe6060f1SDimitry Andric // On the first line of the comment section we calculate how many spaces 923fe6060f1SDimitry Andric // are to be added or removed, all lines after that just get only the 924fe6060f1SDimitry Andric // change and we will not look at the maximum anymore. Additionally to the 925fe6060f1SDimitry Andric // actual first line, we calculate that when the non space Prefix changes, 926fe6060f1SDimitry Andric // e.g. from "///" to "//". 927fe6060f1SDimitry Andric if (i == 0 || OriginalPrefix[i].rtrim(Blanks) != 928fe6060f1SDimitry Andric OriginalPrefix[i - 1].rtrim(Blanks)) { 92956f451bbSDimitry Andric if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() && 93081ad6265SDimitry Andric !NoSpaceBeforeFirstCommentChar()) { 93156f451bbSDimitry Andric FirstLineSpaceChange = Minimum - SpacesInPrefix; 93256f451bbSDimitry Andric } else if (static_cast<unsigned>(SpacesInPrefix) > 93356f451bbSDimitry Andric Style.SpacesInLineCommentPrefix.Maximum) { 934fe6060f1SDimitry Andric FirstLineSpaceChange = 935fe6060f1SDimitry Andric Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix; 936fe6060f1SDimitry Andric } else { 937fe6060f1SDimitry Andric FirstLineSpaceChange = 0; 938fe6060f1SDimitry Andric } 939fe6060f1SDimitry Andric } 940fe6060f1SDimitry Andric 941fe6060f1SDimitry Andric if (Lines[i].size() != IndentPrefix.size()) { 942fe6060f1SDimitry Andric PrefixSpaceChange[i] = FirstLineSpaceChange; 943fe6060f1SDimitry Andric 94456f451bbSDimitry Andric if (SpacesInPrefix + PrefixSpaceChange[i] < Minimum) { 94556f451bbSDimitry Andric PrefixSpaceChange[i] += 94656f451bbSDimitry Andric Minimum - (SpacesInPrefix + PrefixSpaceChange[i]); 947fe6060f1SDimitry Andric } 948fe6060f1SDimitry Andric 949fe6060f1SDimitry Andric assert(Lines[i].size() > IndentPrefix.size()); 950fe6060f1SDimitry Andric const auto FirstNonSpace = Lines[i][IndentPrefix.size()]; 95181ad6265SDimitry Andric const bool IsFormatComment = LineTok && switchesFormatting(*LineTok); 95281ad6265SDimitry Andric const bool LineRequiresLeadingSpace = 95381ad6265SDimitry Andric !NoSpaceBeforeFirstCommentChar() || 95481ad6265SDimitry Andric (FirstNonSpace == '}' && FirstLineSpaceChange != 0); 95581ad6265SDimitry Andric const bool AllowsSpaceChange = 95681ad6265SDimitry Andric !IsFormatComment && 95781ad6265SDimitry Andric (SpacesInPrefix != 0 || LineRequiresLeadingSpace); 958fe6060f1SDimitry Andric 959fe6060f1SDimitry Andric if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) { 960fe6060f1SDimitry Andric Prefix[i] = IndentPrefix.str(); 961fe6060f1SDimitry Andric Prefix[i].append(PrefixSpaceChange[i], ' '); 962fe6060f1SDimitry Andric } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) { 963fe6060f1SDimitry Andric Prefix[i] = IndentPrefix 964fe6060f1SDimitry Andric .drop_back(std::min<std::size_t>( 965fe6060f1SDimitry Andric -PrefixSpaceChange[i], SpacesInPrefix)) 966fe6060f1SDimitry Andric .str(); 967fe6060f1SDimitry Andric } else { 968fe6060f1SDimitry Andric Prefix[i] = IndentPrefix.str(); 969fe6060f1SDimitry Andric } 970fe6060f1SDimitry Andric } else { 971fe6060f1SDimitry Andric // If the IndentPrefix is the whole line, there is no content and we 972fe6060f1SDimitry Andric // drop just all space 973fe6060f1SDimitry Andric Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str(); 9740b57cec5SDimitry Andric } 9750b57cec5SDimitry Andric 9760b57cec5SDimitry Andric Tokens[i] = LineTok; 9770b57cec5SDimitry Andric Content[i] = Lines[i].substr(IndentPrefix.size()); 9780b57cec5SDimitry Andric ContentColumn[i] = 9790b57cec5SDimitry Andric StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn, 9800b57cec5SDimitry Andric Style.TabWidth, Encoding); 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andric // Calculate the end of the non-whitespace text in this line. 9830b57cec5SDimitry Andric size_t EndOfLine = Content[i].find_last_not_of(Blanks); 9840b57cec5SDimitry Andric if (EndOfLine == StringRef::npos) 9850b57cec5SDimitry Andric EndOfLine = Content[i].size(); 9860b57cec5SDimitry Andric else 9870b57cec5SDimitry Andric ++EndOfLine; 9880b57cec5SDimitry Andric Content[i] = Content[i].substr(0, EndOfLine); 9890b57cec5SDimitry Andric } 9900b57cec5SDimitry Andric LineTok = CurrentTok->Next; 9910b57cec5SDimitry Andric if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) { 9920b57cec5SDimitry Andric // A line comment section needs to broken by a line comment that is 9930b57cec5SDimitry Andric // preceded by at least two newlines. Note that we put this break here 9940b57cec5SDimitry Andric // instead of breaking at a previous stage during parsing, since that 9950b57cec5SDimitry Andric // would split the contents of the enum into two unwrapped lines in this 9960b57cec5SDimitry Andric // example, which is undesirable: 9970b57cec5SDimitry Andric // enum A { 9980b57cec5SDimitry Andric // a, // comment about a 9990b57cec5SDimitry Andric // 10000b57cec5SDimitry Andric // // comment about b 10010b57cec5SDimitry Andric // b 10020b57cec5SDimitry Andric // }; 10030b57cec5SDimitry Andric // 10040b57cec5SDimitry Andric // FIXME: Consider putting separate line comment sections as children to 10050b57cec5SDimitry Andric // the unwrapped line instead. 10060b57cec5SDimitry Andric break; 10070b57cec5SDimitry Andric } 10080b57cec5SDimitry Andric } 10090b57cec5SDimitry Andric } 10100b57cec5SDimitry Andric 10110b57cec5SDimitry Andric unsigned 10120b57cec5SDimitry Andric BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset, 10130b57cec5SDimitry Andric StringRef::size_type Length, 10140b57cec5SDimitry Andric unsigned StartColumn) const { 10150b57cec5SDimitry Andric return encoding::columnWidthWithTabs( 10160b57cec5SDimitry Andric Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth, 10170b57cec5SDimitry Andric Encoding); 10180b57cec5SDimitry Andric } 10190b57cec5SDimitry Andric 1020fe6060f1SDimitry Andric unsigned 1021fe6060f1SDimitry Andric BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, 1022fe6060f1SDimitry Andric bool /*Break*/) const { 10230b57cec5SDimitry Andric return ContentColumn[LineIndex]; 10240b57cec5SDimitry Andric } 10250b57cec5SDimitry Andric 10260b57cec5SDimitry Andric void BreakableLineCommentSection::insertBreak( 10270b57cec5SDimitry Andric unsigned LineIndex, unsigned TailOffset, Split Split, 10280b57cec5SDimitry Andric unsigned ContentIndent, WhitespaceManager &Whitespaces) const { 10290b57cec5SDimitry Andric StringRef Text = Content[LineIndex].substr(TailOffset); 10300b57cec5SDimitry Andric // Compute the offset of the split relative to the beginning of the token 10310b57cec5SDimitry Andric // text. 10320b57cec5SDimitry Andric unsigned BreakOffsetInToken = 10330b57cec5SDimitry Andric Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 10340b57cec5SDimitry Andric unsigned CharsToRemove = Split.second; 10350b57cec5SDimitry Andric Whitespaces.replaceWhitespaceInToken( 10360b57cec5SDimitry Andric tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", 10370b57cec5SDimitry Andric Prefix[LineIndex], InPPDirective, /*Newlines=*/1, 1038fe6060f1SDimitry Andric /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size()); 10390b57cec5SDimitry Andric } 10400b57cec5SDimitry Andric 10410b57cec5SDimitry Andric BreakableComment::Split BreakableLineCommentSection::getReflowSplit( 1042480093f4SDimitry Andric unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 10430b57cec5SDimitry Andric if (!mayReflow(LineIndex, CommentPragmasRegex)) 10440b57cec5SDimitry Andric return Split(StringRef::npos, 0); 10450b57cec5SDimitry Andric 10460b57cec5SDimitry Andric size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); 10470b57cec5SDimitry Andric 10480b57cec5SDimitry Andric // In a line comment section each line is a separate token; thus, after a 10490b57cec5SDimitry Andric // split we replace all whitespace before the current line comment token 10500b57cec5SDimitry Andric // (which does not need to be included in the split), plus the start of the 10510b57cec5SDimitry Andric // line up to where the content starts. 10520b57cec5SDimitry Andric return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); 10530b57cec5SDimitry Andric } 10540b57cec5SDimitry Andric 10550b57cec5SDimitry Andric void BreakableLineCommentSection::reflow(unsigned LineIndex, 10560b57cec5SDimitry Andric WhitespaceManager &Whitespaces) const { 10570b57cec5SDimitry Andric if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { 10580b57cec5SDimitry Andric // Reflow happens between tokens. Replace the whitespace between the 10590b57cec5SDimitry Andric // tokens by the empty string. 10600b57cec5SDimitry Andric Whitespaces.replaceWhitespace( 10610b57cec5SDimitry Andric *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, 10625ffd83dbSDimitry Andric /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true, 10635ffd83dbSDimitry Andric /*InPPDirective=*/false); 10640b57cec5SDimitry Andric } else if (LineIndex > 0) { 10650b57cec5SDimitry Andric // In case we're reflowing after the '\' in: 10660b57cec5SDimitry Andric // 10670b57cec5SDimitry Andric // // line comment \ 10680b57cec5SDimitry Andric // // line 2 10690b57cec5SDimitry Andric // 10700b57cec5SDimitry Andric // the reflow happens inside the single comment token (it is a single line 10710b57cec5SDimitry Andric // comment with an unescaped newline). 10720b57cec5SDimitry Andric // Replace the whitespace between the '\' and '//' with the empty string. 10730b57cec5SDimitry Andric // 10740b57cec5SDimitry Andric // Offset points to after the '\' relative to start of the token. 10750b57cec5SDimitry Andric unsigned Offset = Lines[LineIndex - 1].data() + 10760b57cec5SDimitry Andric Lines[LineIndex - 1].size() - 10770b57cec5SDimitry Andric tokenAt(LineIndex - 1).TokenText.data(); 10780b57cec5SDimitry Andric // WhitespaceLength is the number of chars between the '\' and the '//' on 10790b57cec5SDimitry Andric // the next line. 10800b57cec5SDimitry Andric unsigned WhitespaceLength = 10810b57cec5SDimitry Andric Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset; 10820b57cec5SDimitry Andric Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset, 10830b57cec5SDimitry Andric /*ReplaceChars=*/WhitespaceLength, 10840b57cec5SDimitry Andric /*PreviousPostfix=*/"", 10850b57cec5SDimitry Andric /*CurrentPrefix=*/"", 10860b57cec5SDimitry Andric /*InPPDirective=*/false, 10870b57cec5SDimitry Andric /*Newlines=*/0, 10880b57cec5SDimitry Andric /*Spaces=*/0); 10890b57cec5SDimitry Andric } 10900b57cec5SDimitry Andric // Replace the indent and prefix of the token with the reflow prefix. 10910b57cec5SDimitry Andric unsigned Offset = 10920b57cec5SDimitry Andric Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); 10930b57cec5SDimitry Andric unsigned WhitespaceLength = 10940b57cec5SDimitry Andric Content[LineIndex].data() - Lines[LineIndex].data(); 10950b57cec5SDimitry Andric Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset, 10960b57cec5SDimitry Andric /*ReplaceChars=*/WhitespaceLength, 10970b57cec5SDimitry Andric /*PreviousPostfix=*/"", 10980b57cec5SDimitry Andric /*CurrentPrefix=*/ReflowPrefix, 10990b57cec5SDimitry Andric /*InPPDirective=*/false, 11000b57cec5SDimitry Andric /*Newlines=*/0, 11010b57cec5SDimitry Andric /*Spaces=*/0); 11020b57cec5SDimitry Andric } 11030b57cec5SDimitry Andric 11040b57cec5SDimitry Andric void BreakableLineCommentSection::adaptStartOfLine( 11050b57cec5SDimitry Andric unsigned LineIndex, WhitespaceManager &Whitespaces) const { 11060b57cec5SDimitry Andric // If this is the first line of a token, we need to inform Whitespace Manager 11070b57cec5SDimitry Andric // about it: either adapt the whitespace range preceding it, or mark it as an 11080b57cec5SDimitry Andric // untouchable token. 11090b57cec5SDimitry Andric // This happens for instance here: 11100b57cec5SDimitry Andric // // line 1 \ 11110b57cec5SDimitry Andric // // line 2 11120b57cec5SDimitry Andric if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { 11130b57cec5SDimitry Andric // This is the first line for the current token, but no reflow with the 11140b57cec5SDimitry Andric // previous token is necessary. However, we still may need to adjust the 11150b57cec5SDimitry Andric // start column. Note that ContentColumn[LineIndex] is the expected 11160b57cec5SDimitry Andric // content column after a possible update to the prefix, hence the prefix 11170b57cec5SDimitry Andric // length change is included. 11180b57cec5SDimitry Andric unsigned LineColumn = 11190b57cec5SDimitry Andric ContentColumn[LineIndex] - 11200b57cec5SDimitry Andric (Content[LineIndex].data() - Lines[LineIndex].data()) + 11210b57cec5SDimitry Andric (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size()); 11220b57cec5SDimitry Andric 11230b57cec5SDimitry Andric // We always want to create a replacement instead of adding an untouchable 11240b57cec5SDimitry Andric // token, even if LineColumn is the same as the original column of the 11250b57cec5SDimitry Andric // token. This is because WhitespaceManager doesn't align trailing 11260b57cec5SDimitry Andric // comments if they are untouchable. 11270b57cec5SDimitry Andric Whitespaces.replaceWhitespace(*Tokens[LineIndex], 11280b57cec5SDimitry Andric /*Newlines=*/1, 11290b57cec5SDimitry Andric /*Spaces=*/LineColumn, 11300b57cec5SDimitry Andric /*StartOfTokenColumn=*/LineColumn, 11315ffd83dbSDimitry Andric /*IsAligned=*/true, 11320b57cec5SDimitry Andric /*InPPDirective=*/false); 11330b57cec5SDimitry Andric } 11340b57cec5SDimitry Andric if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) { 11350b57cec5SDimitry Andric // Adjust the prefix if necessary. 1136fe6060f1SDimitry Andric const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0); 1137fe6060f1SDimitry Andric const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0); 11380b57cec5SDimitry Andric Whitespaces.replaceWhitespaceInToken( 1139fe6060f1SDimitry Andric tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove, 1140fe6060f1SDimitry Andric /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false, 1141fe6060f1SDimitry Andric /*Newlines=*/0, /*Spaces=*/SpacesToAdd); 11420b57cec5SDimitry Andric } 11430b57cec5SDimitry Andric } 11440b57cec5SDimitry Andric 11450b57cec5SDimitry Andric void BreakableLineCommentSection::updateNextToken(LineState &State) const { 114681ad6265SDimitry Andric if (LastLineTok) 11470b57cec5SDimitry Andric State.NextToken = LastLineTok->Next; 11480b57cec5SDimitry Andric } 11490b57cec5SDimitry Andric 11500b57cec5SDimitry Andric bool BreakableLineCommentSection::mayReflow( 1151480093f4SDimitry Andric unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 11520b57cec5SDimitry Andric // Line comments have the indent as part of the prefix, so we need to 11530b57cec5SDimitry Andric // recompute the start of the line. 11540b57cec5SDimitry Andric StringRef IndentContent = Content[LineIndex]; 11555f757f3fSDimitry Andric if (Lines[LineIndex].starts_with("//")) 11560b57cec5SDimitry Andric IndentContent = Lines[LineIndex].substr(2); 11570b57cec5SDimitry Andric // FIXME: Decide whether we want to reflow non-regular indents: 11580b57cec5SDimitry Andric // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the 11590b57cec5SDimitry Andric // OriginalPrefix[LineIndex-1]. That means we don't reflow 11600b57cec5SDimitry Andric // // text that protrudes 11610b57cec5SDimitry Andric // // into text with different indent 11620b57cec5SDimitry Andric // We do reflow in that case in block comments. 11630b57cec5SDimitry Andric return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && 11640b57cec5SDimitry Andric mayReflowContent(Content[LineIndex]) && !Tok.Finalized && 11650b57cec5SDimitry Andric !switchesFormatting(tokenAt(LineIndex)) && 11660b57cec5SDimitry Andric OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]; 11670b57cec5SDimitry Andric } 11680b57cec5SDimitry Andric 11690b57cec5SDimitry Andric } // namespace format 11700b57cec5SDimitry Andric } // namespace clang 1171