lib/Format/BreakableToken.cpp

0b57cec5SDimitry Andric//===--- BreakableToken.cpp - Format C++ code -----------------------------===//
0b57cec5SDimitry Andric//
0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
0b57cec5SDimitry Andric//
0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
0b57cec5SDimitry Andric///
0b57cec5SDimitry Andric/// \file
0b57cec5SDimitry Andric/// Contains implementation of BreakableToken class and classes derived
0b57cec5SDimitry Andric/// from it.
0b57cec5SDimitry Andric///
0b57cec5SDimitry Andric//===----------------------------------------------------------------------===//
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric#include "BreakableToken.h"
0b57cec5SDimitry Andric#include "ContinuationIndenter.h"
0b57cec5SDimitry Andric#include "clang/Basic/CharInfo.h"
0b57cec5SDimitry Andric#include "clang/Format/Format.h"
0b57cec5SDimitry Andric#include "llvm/ADT/STLExtras.h"
0b57cec5SDimitry Andric#include "llvm/Support/Debug.h"
0b57cec5SDimitry Andric#include <algorithm>
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric#define DEBUG_TYPE "format-token-breaker"
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricnamespace clang {
0b57cec5SDimitry Andricnamespace format {
0b57cec5SDimitry Andric
e8d8bef9SDimitry Andricstatic constexpr StringRef Blanks = " \t\v\f\r";
0b57cec5SDimitry Andricstatic bool IsBlank(char C) {
0b57cec5SDimitry Andric  switch (C) {
0b57cec5SDimitry Andric  case ' ':
0b57cec5SDimitry Andric  case '\t':
0b57cec5SDimitry Andric  case '\v':
0b57cec5SDimitry Andric  case '\f':
0b57cec5SDimitry Andric  case '\r':
0b57cec5SDimitry Andric    return true;
0b57cec5SDimitry Andric  default:
0b57cec5SDimitry Andric    return false;
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricstatic StringRef getLineCommentIndentPrefix(StringRef Comment,
0b57cec5SDimitry Andric                                            const FormatStyle &Style) {
e8d8bef9SDimitry Andric  static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///",
e8d8bef9SDimitry Andric                                                      "//!",  "//:",  "//"};
e8d8bef9SDimitry Andric  static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##",
e8d8bef9SDimitry Andric                                                         "//", "#"};
e8d8bef9SDimitry Andric  ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes);
0b57cec5SDimitry Andric  if (Style.Language == FormatStyle::LK_TextProto)
0b57cec5SDimitry Andric    KnownPrefixes = KnownTextProtoPrefixes;
0b57cec5SDimitry Andric
bdd1243dSDimitry Andric  assert(
bdd1243dSDimitry Andric      llvm::is_sorted(KnownPrefixes, [](StringRef Lhs, StringRef Rhs) noexcept {
e8d8bef9SDimitry Andric        return Lhs.size() > Rhs.size();
e8d8bef9SDimitry Andric      }));
e8d8bef9SDimitry Andric
0b57cec5SDimitry Andric  for (StringRef KnownPrefix : KnownPrefixes) {
5f757f3fSDimitry Andric    if (Comment.starts_with(KnownPrefix)) {
e8d8bef9SDimitry Andric      const auto PrefixLength =
e8d8bef9SDimitry Andric          Comment.find_first_not_of(' ', KnownPrefix.size());
e8d8bef9SDimitry Andric      return Comment.substr(0, PrefixLength);
0b57cec5SDimitry Andric    }
0b57cec5SDimitry Andric  }
e8d8bef9SDimitry Andric  return {};
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricstatic BreakableToken::Split
0b57cec5SDimitry AndricgetCommentSplit(StringRef Text, unsigned ContentStartColumn,
0b57cec5SDimitry Andric                unsigned ColumnLimit, unsigned TabWidth,
0b57cec5SDimitry Andric                encoding::Encoding Encoding, const FormatStyle &Style,
0b57cec5SDimitry Andric                bool DecorationEndsWithStar = false) {
0b57cec5SDimitry Andric  LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
0b57cec5SDimitry Andric                          << "\", Column limit: " << ColumnLimit
0b57cec5SDimitry Andric                          << ", Content start: " << ContentStartColumn << "\n");
0b57cec5SDimitry Andric  if (ColumnLimit <= ContentStartColumn + 1)
0b57cec5SDimitry Andric    return BreakableToken::Split(StringRef::npos, 0);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
0b57cec5SDimitry Andric  unsigned MaxSplitBytes = 0;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  for (unsigned NumChars = 0;
0b57cec5SDimitry Andric       NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
0b57cec5SDimitry Andric    unsigned BytesInChar =
0b57cec5SDimitry Andric        encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
06c3fb27SDimitry Andric    NumChars += encoding::columnWidthWithTabs(
06c3fb27SDimitry Andric        Text.substr(MaxSplitBytes, BytesInChar), ContentStartColumn + NumChars,
06c3fb27SDimitry Andric        TabWidth, Encoding);
0b57cec5SDimitry Andric    MaxSplitBytes += BytesInChar;
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // In JavaScript, some @tags can be followed by {, and machinery that parses
0b57cec5SDimitry Andric  // these comments will fail to understand the comment if followed by a line
0b57cec5SDimitry Andric  // break. So avoid ever breaking before a {.
0eae32dcSDimitry Andric  if (Style.isJavaScript()) {
e8d8bef9SDimitry Andric    StringRef::size_type SpaceOffset =
e8d8bef9SDimitry Andric        Text.find_first_of(Blanks, MaxSplitBytes);
e8d8bef9SDimitry Andric    if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() &&
e8d8bef9SDimitry Andric        Text[SpaceOffset + 1] == '{') {
e8d8bef9SDimitry Andric      MaxSplitBytes = SpaceOffset + 1;
e8d8bef9SDimitry Andric    }
e8d8bef9SDimitry Andric  }
e8d8bef9SDimitry Andric
e8d8bef9SDimitry Andric  StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
e8d8bef9SDimitry Andric
e8d8bef9SDimitry Andric  static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\.");
e8d8bef9SDimitry Andric  // Some spaces are unacceptable to break on, rewind past them.
e8d8bef9SDimitry Andric  while (SpaceOffset != StringRef::npos) {
e8d8bef9SDimitry Andric    // If a line-comment ends with `\`, the next line continues the comment,
e8d8bef9SDimitry Andric    // whether or not it starts with `//`. This is confusing and triggers
e8d8bef9SDimitry Andric    // -Wcomment.
e8d8bef9SDimitry Andric    // Avoid introducing multiline comments by not allowing a break right
e8d8bef9SDimitry Andric    // after '\'.
e8d8bef9SDimitry Andric    if (Style.isCpp()) {
e8d8bef9SDimitry Andric      StringRef::size_type LastNonBlank =
e8d8bef9SDimitry Andric          Text.find_last_not_of(Blanks, SpaceOffset);
e8d8bef9SDimitry Andric      if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') {
e8d8bef9SDimitry Andric        SpaceOffset = Text.find_last_of(Blanks, LastNonBlank);
e8d8bef9SDimitry Andric        continue;
e8d8bef9SDimitry Andric      }
e8d8bef9SDimitry Andric    }
e8d8bef9SDimitry Andric
e8d8bef9SDimitry Andric    // Do not split before a number followed by a dot: this would be interpreted
e8d8bef9SDimitry Andric    // as a numbered list, which would prevent re-flowing in subsequent passes.
e8d8bef9SDimitry Andric    if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) {
0b57cec5SDimitry Andric      SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
e8d8bef9SDimitry Andric      continue;
e8d8bef9SDimitry Andric    }
e8d8bef9SDimitry Andric
e8d8bef9SDimitry Andric    // Avoid ever breaking before a @tag or a { in JavaScript.
0eae32dcSDimitry Andric    if (Style.isJavaScript() && SpaceOffset + 1 < Text.size() &&
e8d8bef9SDimitry Andric        (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) {
e8d8bef9SDimitry Andric      SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
e8d8bef9SDimitry Andric      continue;
e8d8bef9SDimitry Andric    }
e8d8bef9SDimitry Andric
0b57cec5SDimitry Andric    break;
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  if (SpaceOffset == StringRef::npos ||
0b57cec5SDimitry Andric      // Don't break at leading whitespace.
0b57cec5SDimitry Andric      Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
0b57cec5SDimitry Andric    // Make sure that we don't break at leading whitespace that
0b57cec5SDimitry Andric    // reaches past MaxSplit.
0b57cec5SDimitry Andric    StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
81ad6265SDimitry Andric    if (FirstNonWhitespace == StringRef::npos) {
0b57cec5SDimitry Andric      // If the comment is only whitespace, we cannot split.
0b57cec5SDimitry Andric      return BreakableToken::Split(StringRef::npos, 0);
81ad6265SDimitry Andric    }
0b57cec5SDimitry Andric    SpaceOffset = Text.find_first_of(
0b57cec5SDimitry Andric        Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric  if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
0b57cec5SDimitry Andric    // adaptStartOfLine will break after lines starting with /** if the comment
0b57cec5SDimitry Andric    // is broken anywhere. Avoid emitting this break twice here.
0b57cec5SDimitry Andric    // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
0b57cec5SDimitry Andric    // insert a break after /**, so this code must not insert the same break.
0b57cec5SDimitry Andric    if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
0b57cec5SDimitry Andric      return BreakableToken::Split(StringRef::npos, 0);
0b57cec5SDimitry Andric    StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
0b57cec5SDimitry Andric    StringRef AfterCut = Text.substr(SpaceOffset);
0b57cec5SDimitry Andric    // Don't trim the leading blanks if it would create a */ after the break.
0b57cec5SDimitry Andric    if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/')
0b57cec5SDimitry Andric      AfterCut = AfterCut.ltrim(Blanks);
0b57cec5SDimitry Andric    return BreakableToken::Split(BeforeCut.size(),
0b57cec5SDimitry Andric                                 AfterCut.begin() - BeforeCut.end());
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric  return BreakableToken::Split(StringRef::npos, 0);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricstatic BreakableToken::Split
0b57cec5SDimitry AndricgetStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
0b57cec5SDimitry Andric               unsigned TabWidth, encoding::Encoding Encoding) {
0b57cec5SDimitry Andric  // FIXME: Reduce unit test case.
0b57cec5SDimitry Andric  if (Text.empty())
0b57cec5SDimitry Andric    return BreakableToken::Split(StringRef::npos, 0);
0b57cec5SDimitry Andric  if (ColumnLimit <= UsedColumns)
0b57cec5SDimitry Andric    return BreakableToken::Split(StringRef::npos, 0);
0b57cec5SDimitry Andric  unsigned MaxSplit = ColumnLimit - UsedColumns;
0b57cec5SDimitry Andric  StringRef::size_type SpaceOffset = 0;
0b57cec5SDimitry Andric  StringRef::size_type SlashOffset = 0;
0b57cec5SDimitry Andric  StringRef::size_type WordStartOffset = 0;
0b57cec5SDimitry Andric  StringRef::size_type SplitPoint = 0;
0b57cec5SDimitry Andric  for (unsigned Chars = 0;;) {
0b57cec5SDimitry Andric    unsigned Advance;
0b57cec5SDimitry Andric    if (Text[0] == '\\') {
0b57cec5SDimitry Andric      Advance = encoding::getEscapeSequenceLength(Text);
0b57cec5SDimitry Andric      Chars += Advance;
0b57cec5SDimitry Andric    } else {
0b57cec5SDimitry Andric      Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
0b57cec5SDimitry Andric      Chars += encoding::columnWidthWithTabs(
0b57cec5SDimitry Andric          Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
0b57cec5SDimitry Andric    }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric    if (Chars > MaxSplit || Text.size() <= Advance)
0b57cec5SDimitry Andric      break;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric    if (IsBlank(Text[0]))
0b57cec5SDimitry Andric      SpaceOffset = SplitPoint;
0b57cec5SDimitry Andric    if (Text[0] == '/')
0b57cec5SDimitry Andric      SlashOffset = SplitPoint;
0b57cec5SDimitry Andric    if (Advance == 1 && !isAlphanumeric(Text[0]))
0b57cec5SDimitry Andric      WordStartOffset = SplitPoint;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric    SplitPoint += Advance;
0b57cec5SDimitry Andric    Text = Text.substr(Advance);
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  if (SpaceOffset != 0)
0b57cec5SDimitry Andric    return BreakableToken::Split(SpaceOffset + 1, 0);
0b57cec5SDimitry Andric  if (SlashOffset != 0)
0b57cec5SDimitry Andric    return BreakableToken::Split(SlashOffset + 1, 0);
0b57cec5SDimitry Andric  if (WordStartOffset != 0)
0b57cec5SDimitry Andric    return BreakableToken::Split(WordStartOffset + 1, 0);
0b57cec5SDimitry Andric  if (SplitPoint != 0)
0b57cec5SDimitry Andric    return BreakableToken::Split(SplitPoint, 0);
0b57cec5SDimitry Andric  return BreakableToken::Split(StringRef::npos, 0);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricbool switchesFormatting(const FormatToken &Token) {
0b57cec5SDimitry Andric  assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
0b57cec5SDimitry Andric         "formatting regions are switched by comment tokens");
0b57cec5SDimitry Andric  StringRef Content = Token.TokenText.substr(2).ltrim();
5f757f3fSDimitry Andric  return Content.starts_with("clang-format on") ||
5f757f3fSDimitry Andric         Content.starts_with("clang-format off");
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricunsigned
0b57cec5SDimitry AndricBreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
0b57cec5SDimitry Andric                                          Split Split) const {
0b57cec5SDimitry Andric  // Example: consider the content
0b57cec5SDimitry Andric  // lala  lala
0b57cec5SDimitry Andric  // - RemainingTokenColumns is the original number of columns, 10;
0b57cec5SDimitry Andric  // - Split is (4, 2), denoting the two spaces between the two words;
0b57cec5SDimitry Andric  //
0b57cec5SDimitry Andric  // We compute the number of columns when the split is compressed into a single
0b57cec5SDimitry Andric  // space, like:
0b57cec5SDimitry Andric  // lala lala
0b57cec5SDimitry Andric  //
0b57cec5SDimitry Andric  // FIXME: Correctly measure the length of whitespace in Split.second so it
0b57cec5SDimitry Andric  // works with tabs.
0b57cec5SDimitry Andric  return RemainingTokenColumns + 1 - Split.second;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricunsigned BreakableStringLiteral::getLineCount() const { return 1; }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricunsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
0b57cec5SDimitry Andric                                                unsigned Offset,
0b57cec5SDimitry Andric                                                StringRef::size_type Length,
0b57cec5SDimitry Andric                                                unsigned StartColumn) const {
0b57cec5SDimitry Andric  llvm_unreachable("Getting the length of a part of the string literal "
0b57cec5SDimitry Andric                   "indicates that the code tries to reflow it.");
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricunsigned
0b57cec5SDimitry AndricBreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
0b57cec5SDimitry Andric                                           unsigned StartColumn) const {
0b57cec5SDimitry Andric  return UnbreakableTailLength + Postfix.size() +
1fd87a68SDimitry Andric         encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn,
1fd87a68SDimitry Andric                                       Style.TabWidth, Encoding);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricunsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
0b57cec5SDimitry Andric                                                       bool Break) const {
0b57cec5SDimitry Andric  return StartColumn + Prefix.size();
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry AndricBreakableStringLiteral::BreakableStringLiteral(
0b57cec5SDimitry Andric    const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
0b57cec5SDimitry Andric    StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
0b57cec5SDimitry Andric    encoding::Encoding Encoding, const FormatStyle &Style)
0b57cec5SDimitry Andric    : BreakableToken(Tok, InPPDirective, Encoding, Style),
0b57cec5SDimitry Andric      StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
0b57cec5SDimitry Andric      UnbreakableTailLength(UnbreakableTailLength) {
5f757f3fSDimitry Andric  assert(Tok.TokenText.starts_with(Prefix) && Tok.TokenText.ends_with(Postfix));
0b57cec5SDimitry Andric  Line = Tok.TokenText.substr(
0b57cec5SDimitry Andric      Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry AndricBreakableToken::Split BreakableStringLiteral::getSplit(
0b57cec5SDimitry Andric    unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
480093f4SDimitry Andric    unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
0b57cec5SDimitry Andric  return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
0b57cec5SDimitry Andric                        ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricvoid BreakableStringLiteral::insertBreak(unsigned LineIndex,
0b57cec5SDimitry Andric                                         unsigned TailOffset, Split Split,
0b57cec5SDimitry Andric                                         unsigned ContentIndent,
0b57cec5SDimitry Andric                                         WhitespaceManager &Whitespaces) const {
0b57cec5SDimitry Andric  Whitespaces.replaceWhitespaceInToken(
0b57cec5SDimitry Andric      Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
0b57cec5SDimitry Andric      Prefix, InPPDirective, 1, StartColumn);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
5f757f3fSDimitry AndricBreakableStringLiteralUsingOperators::BreakableStringLiteralUsingOperators(
5f757f3fSDimitry Andric    const FormatToken &Tok, QuoteStyleType QuoteStyle, bool UnindentPlus,
5f757f3fSDimitry Andric    unsigned StartColumn, unsigned UnbreakableTailLength, bool InPPDirective,
5f757f3fSDimitry Andric    encoding::Encoding Encoding, const FormatStyle &Style)
5f757f3fSDimitry Andric    : BreakableStringLiteral(
5f757f3fSDimitry Andric          Tok, StartColumn, /*Prefix=*/QuoteStyle == SingleQuotes ? "'"
5f757f3fSDimitry Andric                            : QuoteStyle == AtDoubleQuotes        ? "@\""
5f757f3fSDimitry Andric                                                                  : "\"",
5f757f3fSDimitry Andric          /*Postfix=*/QuoteStyle == SingleQuotes ? "'" : "\"",
5f757f3fSDimitry Andric          UnbreakableTailLength, InPPDirective, Encoding, Style),
5f757f3fSDimitry Andric      BracesNeeded(Tok.isNot(TT_StringInConcatenation)),
5f757f3fSDimitry Andric      QuoteStyle(QuoteStyle) {
5f757f3fSDimitry Andric  // Find the replacement text for inserting braces and quotes and line breaks.
5f757f3fSDimitry Andric  // We don't create an allocated string concatenated from parts here because it
5f757f3fSDimitry Andric  // has to outlive the BreakableStringliteral object.  The brace replacements
5f757f3fSDimitry Andric  // include a quote so that WhitespaceManager can tell it apart from whitespace
5f757f3fSDimitry Andric  // replacements between the string and surrounding tokens.
5f757f3fSDimitry Andric
5f757f3fSDimitry Andric  // The option is not implemented in JavaScript.
5f757f3fSDimitry Andric  bool SignOnNewLine =
5f757f3fSDimitry Andric      !Style.isJavaScript() &&
5f757f3fSDimitry Andric      Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
5f757f3fSDimitry Andric
5f757f3fSDimitry Andric  if (Style.isVerilog()) {
5f757f3fSDimitry Andric    // In Verilog, all strings are quoted by double quotes, joined by commas,
5f757f3fSDimitry Andric    // and wrapped in braces.  The comma is always before the newline.
5f757f3fSDimitry Andric    assert(QuoteStyle == DoubleQuotes);
5f757f3fSDimitry Andric    LeftBraceQuote = Style.Cpp11BracedListStyle ? "{\"" : "{ \"";
5f757f3fSDimitry Andric    RightBraceQuote = Style.Cpp11BracedListStyle ? "\"}" : "\" }";
5f757f3fSDimitry Andric    Postfix = "\",";
5f757f3fSDimitry Andric    Prefix = "\"";
5f757f3fSDimitry Andric  } else {
5f757f3fSDimitry Andric    // The plus sign may be on either line.  And also C# and JavaScript have
5f757f3fSDimitry Andric    // several quoting styles.
5f757f3fSDimitry Andric    if (QuoteStyle == SingleQuotes) {
5f757f3fSDimitry Andric      LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( '" : "('";
5f757f3fSDimitry Andric      RightBraceQuote = Style.SpacesInParensOptions.Other ? "' )" : "')";
5f757f3fSDimitry Andric      Postfix = SignOnNewLine ? "'" : "' +";
5f757f3fSDimitry Andric      Prefix = SignOnNewLine ? "+ '" : "'";
5f757f3fSDimitry Andric    } else {
5f757f3fSDimitry Andric      if (QuoteStyle == AtDoubleQuotes) {
5f757f3fSDimitry Andric        LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( @" : "(@";
5f757f3fSDimitry Andric        Prefix = SignOnNewLine ? "+ @\"" : "@\"";
5f757f3fSDimitry Andric      } else {
5f757f3fSDimitry Andric        LeftBraceQuote = Style.SpacesInParensOptions.Other ? "( \"" : "(\"";
5f757f3fSDimitry Andric        Prefix = SignOnNewLine ? "+ \"" : "\"";
5f757f3fSDimitry Andric      }
5f757f3fSDimitry Andric      RightBraceQuote = Style.SpacesInParensOptions.Other ? "\" )" : "\")";
5f757f3fSDimitry Andric      Postfix = SignOnNewLine ? "\"" : "\" +";
5f757f3fSDimitry Andric    }
5f757f3fSDimitry Andric  }
5f757f3fSDimitry Andric
5f757f3fSDimitry Andric  // Following lines are indented by the width of the brace and space if any.
5f757f3fSDimitry Andric  ContinuationIndent = BracesNeeded ? LeftBraceQuote.size() - 1 : 0;
5f757f3fSDimitry Andric  // The plus sign may need to be unindented depending on the style.
5f757f3fSDimitry Andric  // FIXME: Add support for DontAlign.
5f757f3fSDimitry Andric  if (!Style.isVerilog() && SignOnNewLine && !BracesNeeded && UnindentPlus &&
5f757f3fSDimitry Andric      Style.AlignOperands == FormatStyle::OAS_AlignAfterOperator) {
5f757f3fSDimitry Andric    ContinuationIndent -= 2;
5f757f3fSDimitry Andric  }
5f757f3fSDimitry Andric}
5f757f3fSDimitry Andric
5f757f3fSDimitry Andricunsigned BreakableStringLiteralUsingOperators::getRemainingLength(
5f757f3fSDimitry Andric    unsigned LineIndex, unsigned Offset, unsigned StartColumn) const {
5f757f3fSDimitry Andric  return UnbreakableTailLength + (BracesNeeded ? RightBraceQuote.size() : 1) +
5f757f3fSDimitry Andric         encoding::columnWidthWithTabs(Line.substr(Offset), StartColumn,
5f757f3fSDimitry Andric                                       Style.TabWidth, Encoding);
5f757f3fSDimitry Andric}
5f757f3fSDimitry Andric
5f757f3fSDimitry Andricunsigned
5f757f3fSDimitry AndricBreakableStringLiteralUsingOperators::getContentStartColumn(unsigned LineIndex,
5f757f3fSDimitry Andric                                                            bool Break) const {
5f757f3fSDimitry Andric  return std::max(
5f757f3fSDimitry Andric      0,
5f757f3fSDimitry Andric      static_cast<int>(StartColumn) +
5f757f3fSDimitry Andric          (Break ? ContinuationIndent + static_cast<int>(Prefix.size())
5f757f3fSDimitry Andric                 : (BracesNeeded ? static_cast<int>(LeftBraceQuote.size()) - 1
5f757f3fSDimitry Andric                                 : 0) +
5f757f3fSDimitry Andric                       (QuoteStyle == AtDoubleQuotes ? 2 : 1)));
5f757f3fSDimitry Andric}
5f757f3fSDimitry Andric
5f757f3fSDimitry Andricvoid BreakableStringLiteralUsingOperators::insertBreak(
5f757f3fSDimitry Andric    unsigned LineIndex, unsigned TailOffset, Split Split,
5f757f3fSDimitry Andric    unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
5f757f3fSDimitry Andric  Whitespaces.replaceWhitespaceInToken(
5f757f3fSDimitry Andric      Tok, /*Offset=*/(QuoteStyle == AtDoubleQuotes ? 2 : 1) + TailOffset +
5f757f3fSDimitry Andric               Split.first,
5f757f3fSDimitry Andric      /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/Postfix,
5f757f3fSDimitry Andric      /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1,
5f757f3fSDimitry Andric      /*Spaces=*/
5f757f3fSDimitry Andric      std::max(0, static_cast<int>(StartColumn) + ContinuationIndent));
5f757f3fSDimitry Andric}
5f757f3fSDimitry Andric
5f757f3fSDimitry Andricvoid BreakableStringLiteralUsingOperators::updateAfterBroken(
5f757f3fSDimitry Andric    WhitespaceManager &Whitespaces) const {
5f757f3fSDimitry Andric  // Add the braces required for breaking the token if they are needed.
5f757f3fSDimitry Andric  if (!BracesNeeded)
5f757f3fSDimitry Andric    return;
5f757f3fSDimitry Andric
5f757f3fSDimitry Andric  // To add a brace or parenthesis, we replace the quote (or the at sign) with a
5f757f3fSDimitry Andric  // brace and another quote.  This is because the rest of the program requires
5f757f3fSDimitry Andric  // one replacement for each source range.  If we replace the empty strings
5f757f3fSDimitry Andric  // around the string, it may conflict with whitespace replacements between the
5f757f3fSDimitry Andric  // string and adjacent tokens.
5f757f3fSDimitry Andric  Whitespaces.replaceWhitespaceInToken(
5f757f3fSDimitry Andric      Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"",
5f757f3fSDimitry Andric      /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0,
5f757f3fSDimitry Andric      /*Spaces=*/0);
5f757f3fSDimitry Andric  Whitespaces.replaceWhitespaceInToken(
5f757f3fSDimitry Andric      Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1,
5f757f3fSDimitry Andric      /*PreviousPostfix=*/RightBraceQuote,
5f757f3fSDimitry Andric      /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0);
5f757f3fSDimitry Andric}
5f757f3fSDimitry Andric
0b57cec5SDimitry AndricBreakableComment::BreakableComment(const FormatToken &Token,
0b57cec5SDimitry Andric                                   unsigned StartColumn, bool InPPDirective,
0b57cec5SDimitry Andric                                   encoding::Encoding Encoding,
0b57cec5SDimitry Andric                                   const FormatStyle &Style)
0b57cec5SDimitry Andric    : BreakableToken(Token, InPPDirective, Encoding, Style),
0b57cec5SDimitry Andric      StartColumn(StartColumn) {}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricunsigned BreakableComment::getLineCount() const { return Lines.size(); }
0b57cec5SDimitry Andric
0b57cec5SDimitry AndricBreakableToken::Split
0b57cec5SDimitry AndricBreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
0b57cec5SDimitry Andric                           unsigned ColumnLimit, unsigned ContentStartColumn,
480093f4SDimitry Andric                           const llvm::Regex &CommentPragmasRegex) const {
0b57cec5SDimitry Andric  // Don't break lines matching the comment pragmas regex.
0b57cec5SDimitry Andric  if (CommentPragmasRegex.match(Content[LineIndex]))
0b57cec5SDimitry Andric    return Split(StringRef::npos, 0);
0b57cec5SDimitry Andric  return getCommentSplit(Content[LineIndex].substr(TailOffset),
0b57cec5SDimitry Andric                         ContentStartColumn, ColumnLimit, Style.TabWidth,
0b57cec5SDimitry Andric                         Encoding, Style);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricvoid BreakableComment::compressWhitespace(
0b57cec5SDimitry Andric    unsigned LineIndex, unsigned TailOffset, Split Split,
0b57cec5SDimitry Andric    WhitespaceManager &Whitespaces) const {
0b57cec5SDimitry Andric  StringRef Text = Content[LineIndex].substr(TailOffset);
0b57cec5SDimitry Andric  // Text is relative to the content line, but Whitespaces operates relative to
0b57cec5SDimitry Andric  // the start of the corresponding token, so compute the start of the Split
0b57cec5SDimitry Andric  // that needs to be compressed into a single space relative to the start of
0b57cec5SDimitry Andric  // its token.
0b57cec5SDimitry Andric  unsigned BreakOffsetInToken =
0b57cec5SDimitry Andric      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
0b57cec5SDimitry Andric  unsigned CharsToRemove = Split.second;
0b57cec5SDimitry Andric  Whitespaces.replaceWhitespaceInToken(
0b57cec5SDimitry Andric      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
0b57cec5SDimitry Andric      /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricconst FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
0b57cec5SDimitry Andric  return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricstatic bool mayReflowContent(StringRef Content) {
0b57cec5SDimitry Andric  Content = Content.trim(Blanks);
*0fca6ea1SDimitry Andric  // Lines starting with '@' or '\' commonly have special meaning.
0b57cec5SDimitry Andric  // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
0b57cec5SDimitry Andric  bool hasSpecialMeaningPrefix = false;
0b57cec5SDimitry Andric  for (StringRef Prefix :
*0fca6ea1SDimitry Andric       {"@", "\\", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
5f757f3fSDimitry Andric    if (Content.starts_with(Prefix)) {
0b57cec5SDimitry Andric      hasSpecialMeaningPrefix = true;
0b57cec5SDimitry Andric      break;
0b57cec5SDimitry Andric    }
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // Numbered lists may also start with a number followed by '.'
0b57cec5SDimitry Andric  // To avoid issues if a line starts with a number which is actually the end
0b57cec5SDimitry Andric  // of a previous line, we only consider numbers with up to 2 digits.
480093f4SDimitry Andric  static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. ");
0b57cec5SDimitry Andric  hasSpecialMeaningPrefix =
480093f4SDimitry Andric      hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // Simple heuristic for what to reflow: content should contain at least two
0b57cec5SDimitry Andric  // characters and either the first or second character must be
0b57cec5SDimitry Andric  // non-punctuation.
0b57cec5SDimitry Andric  return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
5f757f3fSDimitry Andric         !Content.ends_with("\\") &&
0b57cec5SDimitry Andric         // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
0b57cec5SDimitry Andric         // true, then the first code point must be 1 byte long.
0b57cec5SDimitry Andric         (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry AndricBreakableBlockComment::BreakableBlockComment(
0b57cec5SDimitry Andric    const FormatToken &Token, unsigned StartColumn,
0b57cec5SDimitry Andric    unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
0b57cec5SDimitry Andric    encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF)
0b57cec5SDimitry Andric    : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
0b57cec5SDimitry Andric      DelimitersOnNewline(false),
0b57cec5SDimitry Andric      UnbreakableTailLength(Token.UnbreakableTailLength) {
0b57cec5SDimitry Andric  assert(Tok.is(TT_BlockComment) &&
0b57cec5SDimitry Andric         "block comment section must start with a block comment");
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  StringRef TokenText(Tok.TokenText);
5f757f3fSDimitry Andric  assert(TokenText.starts_with("/*") && TokenText.ends_with("*/"));
a7dea167SDimitry Andric  TokenText.substr(2, TokenText.size() - 4)
a7dea167SDimitry Andric      .split(Lines, UseCRLF ? "\r\n" : "\n");
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  int IndentDelta = StartColumn - OriginalStartColumn;
0b57cec5SDimitry Andric  Content.resize(Lines.size());
0b57cec5SDimitry Andric  Content[0] = Lines[0];
0b57cec5SDimitry Andric  ContentColumn.resize(Lines.size());
0b57cec5SDimitry Andric  // Account for the initial '/*'.
0b57cec5SDimitry Andric  ContentColumn[0] = StartColumn + 2;
0b57cec5SDimitry Andric  Tokens.resize(Lines.size());
0b57cec5SDimitry Andric  for (size_t i = 1; i < Lines.size(); ++i)
0b57cec5SDimitry Andric    adjustWhitespace(i, IndentDelta);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // Align decorations with the column of the star on the first line,
0b57cec5SDimitry Andric  // that is one column after the start "/*".
0b57cec5SDimitry Andric  DecorationColumn = StartColumn + 1;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // Account for comment decoration patterns like this:
0b57cec5SDimitry Andric  //
0b57cec5SDimitry Andric  // /*
0b57cec5SDimitry Andric  // ** blah blah blah
0b57cec5SDimitry Andric  // */
5f757f3fSDimitry Andric  if (Lines.size() >= 2 && Content[1].starts_with("**") &&
0b57cec5SDimitry Andric      static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
0b57cec5SDimitry Andric    DecorationColumn = StartColumn;
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  Decoration = "* ";
0b57cec5SDimitry Andric  if (Lines.size() == 1 && !FirstInLine) {
0b57cec5SDimitry Andric    // Comments for which FirstInLine is false can start on arbitrary column,
0b57cec5SDimitry Andric    // and available horizontal space can be too small to align consecutive
0b57cec5SDimitry Andric    // lines with the first one.
0b57cec5SDimitry Andric    // FIXME: We could, probably, align them to current indentation level, but
0b57cec5SDimitry Andric    // now we just wrap them without stars.
0b57cec5SDimitry Andric    Decoration = "";
0b57cec5SDimitry Andric  }
81ad6265SDimitry Andric  for (size_t i = 1, e = Content.size(); i < e && !Decoration.empty(); ++i) {
81ad6265SDimitry Andric    const StringRef &Text = Content[i];
81ad6265SDimitry Andric    if (i + 1 == e) {
0b57cec5SDimitry Andric      // If the last line is empty, the closing "*/" will have a star.
81ad6265SDimitry Andric      if (Text.empty())
0b57cec5SDimitry Andric        break;
5f757f3fSDimitry Andric    } else if (!Text.empty() && Decoration.starts_with(Text)) {
0b57cec5SDimitry Andric      continue;
81ad6265SDimitry Andric    }
5f757f3fSDimitry Andric    while (!Text.starts_with(Decoration))
81ad6265SDimitry Andric      Decoration = Decoration.drop_back(1);
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  LastLineNeedsDecoration = true;
0b57cec5SDimitry Andric  IndentAtLineBreak = ContentColumn[0] + 1;
0b57cec5SDimitry Andric  for (size_t i = 1, e = Lines.size(); i < e; ++i) {
0b57cec5SDimitry Andric    if (Content[i].empty()) {
0b57cec5SDimitry Andric      if (i + 1 == e) {
0b57cec5SDimitry Andric        // Empty last line means that we already have a star as a part of the
0b57cec5SDimitry Andric        // trailing */. We also need to preserve whitespace, so that */ is
0b57cec5SDimitry Andric        // correctly indented.
0b57cec5SDimitry Andric        LastLineNeedsDecoration = false;
0b57cec5SDimitry Andric        // Align the star in the last '*/' with the stars on the previous lines.
81ad6265SDimitry Andric        if (e >= 2 && !Decoration.empty())
0b57cec5SDimitry Andric          ContentColumn[i] = DecorationColumn;
0b57cec5SDimitry Andric      } else if (Decoration.empty()) {
0b57cec5SDimitry Andric        // For all other lines, set the start column to 0 if they're empty, so
0b57cec5SDimitry Andric        // we do not insert trailing whitespace anywhere.
0b57cec5SDimitry Andric        ContentColumn[i] = 0;
0b57cec5SDimitry Andric      }
0b57cec5SDimitry Andric      continue;
0b57cec5SDimitry Andric    }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric    // The first line already excludes the star.
0b57cec5SDimitry Andric    // The last line excludes the star if LastLineNeedsDecoration is false.
0b57cec5SDimitry Andric    // For all other lines, adjust the line to exclude the star and
0b57cec5SDimitry Andric    // (optionally) the first whitespace.
5f757f3fSDimitry Andric    unsigned DecorationSize = Decoration.starts_with(Content[i])
0b57cec5SDimitry Andric                                  ? Content[i].size()
0b57cec5SDimitry Andric                                  : Decoration.size();
81ad6265SDimitry Andric    if (DecorationSize)
0b57cec5SDimitry Andric      ContentColumn[i] = DecorationColumn + DecorationSize;
0b57cec5SDimitry Andric    Content[i] = Content[i].substr(DecorationSize);
5f757f3fSDimitry Andric    if (!Decoration.starts_with(Content[i])) {
0b57cec5SDimitry Andric      IndentAtLineBreak =
0b57cec5SDimitry Andric          std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
0b57cec5SDimitry Andric    }
81ad6265SDimitry Andric  }
0b57cec5SDimitry Andric  IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
0eae32dcSDimitry Andric  if (Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) {
5f757f3fSDimitry Andric    if ((Lines[0] == "*" || Lines[0].starts_with("* ")) && Lines.size() > 1) {
0b57cec5SDimitry Andric      // This is a multiline jsdoc comment.
0b57cec5SDimitry Andric      DelimitersOnNewline = true;
5f757f3fSDimitry Andric    } else if (Lines[0].starts_with("* ") && Lines.size() == 1) {
0b57cec5SDimitry Andric      // Detect a long single-line comment, like:
0b57cec5SDimitry Andric      // /** long long long */
0b57cec5SDimitry Andric      // Below, '2' is the width of '*/'.
0b57cec5SDimitry Andric      unsigned EndColumn =
0b57cec5SDimitry Andric          ContentColumn[0] +
0b57cec5SDimitry Andric          encoding::columnWidthWithTabs(Lines[0], ContentColumn[0],
0b57cec5SDimitry Andric                                        Style.TabWidth, Encoding) +
0b57cec5SDimitry Andric          2;
0b57cec5SDimitry Andric      DelimitersOnNewline = EndColumn > Style.ColumnLimit;
0b57cec5SDimitry Andric    }
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  LLVM_DEBUG({
0b57cec5SDimitry Andric    llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
0b57cec5SDimitry Andric    llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
0b57cec5SDimitry Andric    for (size_t i = 0; i < Lines.size(); ++i) {
0b57cec5SDimitry Andric      llvm::dbgs() << i << " |" << Content[i] << "| "
0b57cec5SDimitry Andric                   << "CC=" << ContentColumn[i] << "| "
0b57cec5SDimitry Andric                   << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
0b57cec5SDimitry Andric    }
0b57cec5SDimitry Andric  });
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
a7dea167SDimitry AndricBreakableToken::Split BreakableBlockComment::getSplit(
a7dea167SDimitry Andric    unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
480093f4SDimitry Andric    unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const {
0b57cec5SDimitry Andric  // Don't break lines matching the comment pragmas regex.
0b57cec5SDimitry Andric  if (CommentPragmasRegex.match(Content[LineIndex]))
0b57cec5SDimitry Andric    return Split(StringRef::npos, 0);
0b57cec5SDimitry Andric  return getCommentSplit(Content[LineIndex].substr(TailOffset),
0b57cec5SDimitry Andric                         ContentStartColumn, ColumnLimit, Style.TabWidth,
5f757f3fSDimitry Andric                         Encoding, Style, Decoration.ends_with("*"));
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricvoid BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
0b57cec5SDimitry Andric                                             int IndentDelta) {
0b57cec5SDimitry Andric  // When in a preprocessor directive, the trailing backslash in a block comment
0b57cec5SDimitry Andric  // is not needed, but can serve a purpose of uniformity with necessary escaped
0b57cec5SDimitry Andric  // newlines outside the comment. In this case we remove it here before
0b57cec5SDimitry Andric  // trimming the trailing whitespace. The backslash will be re-added later when
0b57cec5SDimitry Andric  // inserting a line break.
0b57cec5SDimitry Andric  size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
5f757f3fSDimitry Andric  if (InPPDirective && Lines[LineIndex - 1].ends_with("\\"))
0b57cec5SDimitry Andric    --EndOfPreviousLine;
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // Calculate the end of the non-whitespace text in the previous line.
0b57cec5SDimitry Andric  EndOfPreviousLine =
0b57cec5SDimitry Andric      Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
0b57cec5SDimitry Andric  if (EndOfPreviousLine == StringRef::npos)
0b57cec5SDimitry Andric    EndOfPreviousLine = 0;
0b57cec5SDimitry Andric  else
0b57cec5SDimitry Andric    ++EndOfPreviousLine;
0b57cec5SDimitry Andric  // Calculate the start of the non-whitespace text in the current line.
0b57cec5SDimitry Andric  size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
0b57cec5SDimitry Andric  if (StartOfLine == StringRef::npos)
0b57cec5SDimitry Andric    StartOfLine = Lines[LineIndex].size();
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
0b57cec5SDimitry Andric  // Adjust Lines to only contain relevant text.
0b57cec5SDimitry Andric  size_t PreviousContentOffset =
0b57cec5SDimitry Andric      Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
0b57cec5SDimitry Andric  Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
0b57cec5SDimitry Andric      PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
0b57cec5SDimitry Andric  Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // Adjust the start column uniformly across all lines.
0b57cec5SDimitry Andric  ContentColumn[LineIndex] =
0b57cec5SDimitry Andric      encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
0b57cec5SDimitry Andric      IndentDelta;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricunsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
0b57cec5SDimitry Andric                                               unsigned Offset,
0b57cec5SDimitry Andric                                               StringRef::size_type Length,
0b57cec5SDimitry Andric                                               unsigned StartColumn) const {
1fd87a68SDimitry Andric  return encoding::columnWidthWithTabs(
1fd87a68SDimitry Andric      Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
1fd87a68SDimitry Andric      Encoding);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricunsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
0b57cec5SDimitry Andric                                                   unsigned Offset,
0b57cec5SDimitry Andric                                                   unsigned StartColumn) const {
1fd87a68SDimitry Andric  unsigned LineLength =
1fd87a68SDimitry Andric      UnbreakableTailLength +
0b57cec5SDimitry Andric      getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
1fd87a68SDimitry Andric  if (LineIndex + 1 == Lines.size()) {
1fd87a68SDimitry Andric    LineLength += 2;
1fd87a68SDimitry Andric    // We never need a decoration when breaking just the trailing "*/" postfix.
1fd87a68SDimitry Andric    bool HasRemainingText = Offset < Content[LineIndex].size();
1fd87a68SDimitry Andric    if (!HasRemainingText) {
5f757f3fSDimitry Andric      bool HasDecoration = Lines[LineIndex].ltrim().starts_with(Decoration);
1fd87a68SDimitry Andric      if (HasDecoration)
1fd87a68SDimitry Andric        LineLength -= Decoration.size();
1fd87a68SDimitry Andric    }
1fd87a68SDimitry Andric  }
1fd87a68SDimitry Andric  return LineLength;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricunsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
0b57cec5SDimitry Andric                                                      bool Break) const {
0b57cec5SDimitry Andric  if (Break)
0b57cec5SDimitry Andric    return IndentAtLineBreak;
0b57cec5SDimitry Andric  return std::max(0, ContentColumn[LineIndex]);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricconst llvm::StringSet<>
0b57cec5SDimitry Andric    BreakableBlockComment::ContentIndentingJavadocAnnotations = {
0b57cec5SDimitry Andric        "@param", "@return",     "@returns", "@throws",  "@type", "@template",
0b57cec5SDimitry Andric        "@see",   "@deprecated", "@define",  "@exports", "@mods", "@private",
0b57cec5SDimitry Andric};
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricunsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
0eae32dcSDimitry Andric  if (Style.Language != FormatStyle::LK_Java && !Style.isJavaScript())
0b57cec5SDimitry Andric    return 0;
0b57cec5SDimitry Andric  // The content at LineIndex 0 of a comment like:
0b57cec5SDimitry Andric  // /** line 0 */
0b57cec5SDimitry Andric  // is "* line 0", so we need to skip over the decoration in that case.
0b57cec5SDimitry Andric  StringRef ContentWithNoDecoration = Content[LineIndex];
5f757f3fSDimitry Andric  if (LineIndex == 0 && ContentWithNoDecoration.starts_with("*"))
0b57cec5SDimitry Andric    ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
0b57cec5SDimitry Andric  StringRef FirstWord = ContentWithNoDecoration.substr(
0b57cec5SDimitry Andric      0, ContentWithNoDecoration.find_first_of(Blanks));
06c3fb27SDimitry Andric  if (ContentIndentingJavadocAnnotations.contains(FirstWord))
0b57cec5SDimitry Andric    return Style.ContinuationIndentWidth;
0b57cec5SDimitry Andric  return 0;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricvoid BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
0b57cec5SDimitry Andric                                        Split Split, unsigned ContentIndent,
0b57cec5SDimitry Andric                                        WhitespaceManager &Whitespaces) const {
0b57cec5SDimitry Andric  StringRef Text = Content[LineIndex].substr(TailOffset);
0b57cec5SDimitry Andric  StringRef Prefix = Decoration;
0b57cec5SDimitry Andric  // We need this to account for the case when we have a decoration "* " for all
0b57cec5SDimitry Andric  // the lines except for the last one, where the star in "*/" acts as a
0b57cec5SDimitry Andric  // decoration.
0b57cec5SDimitry Andric  unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
0b57cec5SDimitry Andric  if (LineIndex + 1 == Lines.size() &&
0b57cec5SDimitry Andric      Text.size() == Split.first + Split.second) {
0b57cec5SDimitry Andric    // For the last line we need to break before "*/", but not to add "* ".
0b57cec5SDimitry Andric    Prefix = "";
0b57cec5SDimitry Andric    if (LocalIndentAtLineBreak >= 2)
0b57cec5SDimitry Andric      LocalIndentAtLineBreak -= 2;
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric  // The split offset is from the beginning of the line. Convert it to an offset
0b57cec5SDimitry Andric  // from the beginning of the token text.
0b57cec5SDimitry Andric  unsigned BreakOffsetInToken =
0b57cec5SDimitry Andric      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
0b57cec5SDimitry Andric  unsigned CharsToRemove = Split.second;
0b57cec5SDimitry Andric  assert(LocalIndentAtLineBreak >= Prefix.size());
5ffd83dbSDimitry Andric  std::string PrefixWithTrailingIndent = std::string(Prefix);
5ffd83dbSDimitry Andric  PrefixWithTrailingIndent.append(ContentIndent, ' ');
0b57cec5SDimitry Andric  Whitespaces.replaceWhitespaceInToken(
0b57cec5SDimitry Andric      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
0b57cec5SDimitry Andric      PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
0b57cec5SDimitry Andric      /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
0b57cec5SDimitry Andric          PrefixWithTrailingIndent.size());
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
480093f4SDimitry AndricBreakableToken::Split BreakableBlockComment::getReflowSplit(
480093f4SDimitry Andric    unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
0b57cec5SDimitry Andric  if (!mayReflow(LineIndex, CommentPragmasRegex))
0b57cec5SDimitry Andric    return Split(StringRef::npos, 0);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // If we're reflowing into a line with content indent, only reflow the next
0b57cec5SDimitry Andric  // line if its starting whitespace matches the content indent.
0b57cec5SDimitry Andric  size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
0b57cec5SDimitry Andric  if (LineIndex) {
0b57cec5SDimitry Andric    unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
0b57cec5SDimitry Andric    if (PreviousContentIndent && Trimmed != StringRef::npos &&
81ad6265SDimitry Andric        Trimmed != PreviousContentIndent) {
0b57cec5SDimitry Andric      return Split(StringRef::npos, 0);
0b57cec5SDimitry Andric    }
81ad6265SDimitry Andric  }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricbool BreakableBlockComment::introducesBreakBeforeToken() const {
0b57cec5SDimitry Andric  // A break is introduced when we want delimiters on newline.
0b57cec5SDimitry Andric  return DelimitersOnNewline &&
0b57cec5SDimitry Andric         Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricvoid BreakableBlockComment::reflow(unsigned LineIndex,
0b57cec5SDimitry Andric                                   WhitespaceManager &Whitespaces) const {
0b57cec5SDimitry Andric  StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
0b57cec5SDimitry Andric  // Here we need to reflow.
0b57cec5SDimitry Andric  assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
0b57cec5SDimitry Andric         "Reflowing whitespace within a token");
0b57cec5SDimitry Andric  // This is the offset of the end of the last line relative to the start of
0b57cec5SDimitry Andric  // the token text in the token.
0b57cec5SDimitry Andric  unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
0b57cec5SDimitry Andric                                     Content[LineIndex - 1].size() -
0b57cec5SDimitry Andric                                     tokenAt(LineIndex).TokenText.data();
0b57cec5SDimitry Andric  unsigned WhitespaceLength = TrimmedContent.data() -
0b57cec5SDimitry Andric                              tokenAt(LineIndex).TokenText.data() -
0b57cec5SDimitry Andric                              WhitespaceOffsetInToken;
0b57cec5SDimitry Andric  Whitespaces.replaceWhitespaceInToken(
0b57cec5SDimitry Andric      tokenAt(LineIndex), WhitespaceOffsetInToken,
0b57cec5SDimitry Andric      /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
0b57cec5SDimitry Andric      /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
0b57cec5SDimitry Andric      /*Spaces=*/0);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricvoid BreakableBlockComment::adaptStartOfLine(
0b57cec5SDimitry Andric    unsigned LineIndex, WhitespaceManager &Whitespaces) const {
0b57cec5SDimitry Andric  if (LineIndex == 0) {
0b57cec5SDimitry Andric    if (DelimitersOnNewline) {
0b57cec5SDimitry Andric      // Since we're breaking at index 1 below, the break position and the
0b57cec5SDimitry Andric      // break length are the same.
0b57cec5SDimitry Andric      // Note: this works because getCommentSplit is careful never to split at
0b57cec5SDimitry Andric      // the beginning of a line.
0b57cec5SDimitry Andric      size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
81ad6265SDimitry Andric      if (BreakLength != StringRef::npos) {
0b57cec5SDimitry Andric        insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,
0b57cec5SDimitry Andric                    Whitespaces);
0b57cec5SDimitry Andric      }
81ad6265SDimitry Andric    }
0b57cec5SDimitry Andric    return;
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric  // Here no reflow with the previous line will happen.
0b57cec5SDimitry Andric  // Fix the decoration of the line at LineIndex.
0b57cec5SDimitry Andric  StringRef Prefix = Decoration;
0b57cec5SDimitry Andric  if (Content[LineIndex].empty()) {
0b57cec5SDimitry Andric    if (LineIndex + 1 == Lines.size()) {
0b57cec5SDimitry Andric      if (!LastLineNeedsDecoration) {
0b57cec5SDimitry Andric        // If the last line was empty, we don't need a prefix, as the */ will
0b57cec5SDimitry Andric        // line up with the decoration (if it exists).
0b57cec5SDimitry Andric        Prefix = "";
0b57cec5SDimitry Andric      }
0b57cec5SDimitry Andric    } else if (!Decoration.empty()) {
0b57cec5SDimitry Andric      // For other empty lines, if we do have a decoration, adapt it to not
0b57cec5SDimitry Andric      // contain a trailing whitespace.
0b57cec5SDimitry Andric      Prefix = Prefix.substr(0, 1);
0b57cec5SDimitry Andric    }
81ad6265SDimitry Andric  } else if (ContentColumn[LineIndex] == 1) {
0b57cec5SDimitry Andric    // This line starts immediately after the decorating *.
0b57cec5SDimitry Andric    Prefix = Prefix.substr(0, 1);
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric  // This is the offset of the end of the last line relative to the start of the
0b57cec5SDimitry Andric  // token text in the token.
0b57cec5SDimitry Andric  unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
0b57cec5SDimitry Andric                                     Content[LineIndex - 1].size() -
0b57cec5SDimitry Andric                                     tokenAt(LineIndex).TokenText.data();
0b57cec5SDimitry Andric  unsigned WhitespaceLength = Content[LineIndex].data() -
0b57cec5SDimitry Andric                              tokenAt(LineIndex).TokenText.data() -
0b57cec5SDimitry Andric                              WhitespaceOffsetInToken;
0b57cec5SDimitry Andric  Whitespaces.replaceWhitespaceInToken(
0b57cec5SDimitry Andric      tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
0b57cec5SDimitry Andric      InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry AndricBreakableToken::Split
0b57cec5SDimitry AndricBreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {
0b57cec5SDimitry Andric  if (DelimitersOnNewline) {
0b57cec5SDimitry Andric    // Replace the trailing whitespace of the last line with a newline.
0b57cec5SDimitry Andric    // In case the last line is empty, the ending '*/' is already on its own
0b57cec5SDimitry Andric    // line.
0b57cec5SDimitry Andric    StringRef Line = Content.back().substr(TailOffset);
0b57cec5SDimitry Andric    StringRef TrimmedLine = Line.rtrim(Blanks);
0b57cec5SDimitry Andric    if (!TrimmedLine.empty())
0b57cec5SDimitry Andric      return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric  return Split(StringRef::npos, 0);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
480093f4SDimitry Andricbool BreakableBlockComment::mayReflow(
480093f4SDimitry Andric    unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
0b57cec5SDimitry Andric  // Content[LineIndex] may exclude the indent after the '*' decoration. In that
0b57cec5SDimitry Andric  // case, we compute the start of the comment pragma manually.
0b57cec5SDimitry Andric  StringRef IndentContent = Content[LineIndex];
5f757f3fSDimitry Andric  if (Lines[LineIndex].ltrim(Blanks).starts_with("*"))
0b57cec5SDimitry Andric    IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
0b57cec5SDimitry Andric  return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
0b57cec5SDimitry Andric         mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
0b57cec5SDimitry Andric         !switchesFormatting(tokenAt(LineIndex));
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry AndricBreakableLineCommentSection::BreakableLineCommentSection(
e8d8bef9SDimitry Andric    const FormatToken &Token, unsigned StartColumn, bool InPPDirective,
0b57cec5SDimitry Andric    encoding::Encoding Encoding, const FormatStyle &Style)
0b57cec5SDimitry Andric    : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
0b57cec5SDimitry Andric  assert(Tok.is(TT_LineComment) &&
0b57cec5SDimitry Andric         "line comment section must start with a line comment");
0b57cec5SDimitry Andric  FormatToken *LineTok = nullptr;
56f451bbSDimitry Andric  const int Minimum = Style.SpacesInLineCommentPrefix.Minimum;
fe6060f1SDimitry Andric  // How many spaces we changed in the first line of the section, this will be
fe6060f1SDimitry Andric  // applied in all following lines
fe6060f1SDimitry Andric  int FirstLineSpaceChange = 0;
0b57cec5SDimitry Andric  for (const FormatToken *CurrentTok = &Tok;
0b57cec5SDimitry Andric       CurrentTok && CurrentTok->is(TT_LineComment);
0b57cec5SDimitry Andric       CurrentTok = CurrentTok->Next) {
0b57cec5SDimitry Andric    LastLineTok = LineTok;
0b57cec5SDimitry Andric    StringRef TokenText(CurrentTok->TokenText);
5f757f3fSDimitry Andric    assert((TokenText.starts_with("//") || TokenText.starts_with("#")) &&
0b57cec5SDimitry Andric           "unsupported line comment prefix, '//' and '#' are supported");
0b57cec5SDimitry Andric    size_t FirstLineIndex = Lines.size();
0b57cec5SDimitry Andric    TokenText.split(Lines, "\n");
0b57cec5SDimitry Andric    Content.resize(Lines.size());
0b57cec5SDimitry Andric    ContentColumn.resize(Lines.size());
fe6060f1SDimitry Andric    PrefixSpaceChange.resize(Lines.size());
0b57cec5SDimitry Andric    Tokens.resize(Lines.size());
0b57cec5SDimitry Andric    Prefix.resize(Lines.size());
0b57cec5SDimitry Andric    OriginalPrefix.resize(Lines.size());
0b57cec5SDimitry Andric    for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
0b57cec5SDimitry Andric      Lines[i] = Lines[i].ltrim(Blanks);
e8d8bef9SDimitry Andric      StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style);
fe6060f1SDimitry Andric      OriginalPrefix[i] = IndentPrefix;
56f451bbSDimitry Andric      const int SpacesInPrefix = llvm::count(IndentPrefix, ' ');
fe6060f1SDimitry Andric
81ad6265SDimitry Andric      // This lambda also considers multibyte character that is not handled in
81ad6265SDimitry Andric      // functions like isPunctuation provided by CharInfo.
81ad6265SDimitry Andric      const auto NoSpaceBeforeFirstCommentChar = [&]() {
81ad6265SDimitry Andric        assert(Lines[i].size() > IndentPrefix.size());
81ad6265SDimitry Andric        const char FirstCommentChar = Lines[i][IndentPrefix.size()];
81ad6265SDimitry Andric        const unsigned FirstCharByteSize =
81ad6265SDimitry Andric            encoding::getCodePointNumBytes(FirstCommentChar, Encoding);
81ad6265SDimitry Andric        if (encoding::columnWidth(
81ad6265SDimitry Andric                Lines[i].substr(IndentPrefix.size(), FirstCharByteSize),
81ad6265SDimitry Andric                Encoding) != 1) {
81ad6265SDimitry Andric          return false;
81ad6265SDimitry Andric        }
81ad6265SDimitry Andric        // In C-like comments, add a space before #. For example this is useful
81ad6265SDimitry Andric        // to preserve the relative indentation when commenting out code with
81ad6265SDimitry Andric        // #includes.
81ad6265SDimitry Andric        //
81ad6265SDimitry Andric        // In languages using # as the comment leader such as proto, don't
81ad6265SDimitry Andric        // add a space to support patterns like:
81ad6265SDimitry Andric        // #########
81ad6265SDimitry Andric        // # section
81ad6265SDimitry Andric        // #########
5f757f3fSDimitry Andric        if (FirstCommentChar == '#' && !TokenText.starts_with("#"))
81ad6265SDimitry Andric          return false;
81ad6265SDimitry Andric        return FirstCommentChar == '\\' || isPunctuation(FirstCommentChar) ||
81ad6265SDimitry Andric               isHorizontalWhitespace(FirstCommentChar);
81ad6265SDimitry Andric      };
81ad6265SDimitry Andric
fe6060f1SDimitry Andric      // On the first line of the comment section we calculate how many spaces
fe6060f1SDimitry Andric      // are to be added or removed, all lines after that just get only the
fe6060f1SDimitry Andric      // change and we will not look at the maximum anymore. Additionally to the
fe6060f1SDimitry Andric      // actual first line, we calculate that when the non space Prefix changes,
fe6060f1SDimitry Andric      // e.g. from "///" to "//".
fe6060f1SDimitry Andric      if (i == 0 || OriginalPrefix[i].rtrim(Blanks) !=
fe6060f1SDimitry Andric                        OriginalPrefix[i - 1].rtrim(Blanks)) {
56f451bbSDimitry Andric        if (SpacesInPrefix < Minimum && Lines[i].size() > IndentPrefix.size() &&
81ad6265SDimitry Andric            !NoSpaceBeforeFirstCommentChar()) {
56f451bbSDimitry Andric          FirstLineSpaceChange = Minimum - SpacesInPrefix;
56f451bbSDimitry Andric        } else if (static_cast<unsigned>(SpacesInPrefix) >
56f451bbSDimitry Andric                   Style.SpacesInLineCommentPrefix.Maximum) {
fe6060f1SDimitry Andric          FirstLineSpaceChange =
fe6060f1SDimitry Andric              Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix;
fe6060f1SDimitry Andric        } else {
fe6060f1SDimitry Andric          FirstLineSpaceChange = 0;
fe6060f1SDimitry Andric        }
fe6060f1SDimitry Andric      }
fe6060f1SDimitry Andric
fe6060f1SDimitry Andric      if (Lines[i].size() != IndentPrefix.size()) {
fe6060f1SDimitry Andric        PrefixSpaceChange[i] = FirstLineSpaceChange;
fe6060f1SDimitry Andric
56f451bbSDimitry Andric        if (SpacesInPrefix + PrefixSpaceChange[i] < Minimum) {
56f451bbSDimitry Andric          PrefixSpaceChange[i] +=
56f451bbSDimitry Andric              Minimum - (SpacesInPrefix + PrefixSpaceChange[i]);
fe6060f1SDimitry Andric        }
fe6060f1SDimitry Andric
fe6060f1SDimitry Andric        assert(Lines[i].size() > IndentPrefix.size());
fe6060f1SDimitry Andric        const auto FirstNonSpace = Lines[i][IndentPrefix.size()];
81ad6265SDimitry Andric        const bool IsFormatComment = LineTok && switchesFormatting(*LineTok);
81ad6265SDimitry Andric        const bool LineRequiresLeadingSpace =
81ad6265SDimitry Andric            !NoSpaceBeforeFirstCommentChar() ||
81ad6265SDimitry Andric            (FirstNonSpace == '}' && FirstLineSpaceChange != 0);
81ad6265SDimitry Andric        const bool AllowsSpaceChange =
81ad6265SDimitry Andric            !IsFormatComment &&
81ad6265SDimitry Andric            (SpacesInPrefix != 0 || LineRequiresLeadingSpace);
fe6060f1SDimitry Andric
fe6060f1SDimitry Andric        if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) {
fe6060f1SDimitry Andric          Prefix[i] = IndentPrefix.str();
fe6060f1SDimitry Andric          Prefix[i].append(PrefixSpaceChange[i], ' ');
fe6060f1SDimitry Andric        } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) {
fe6060f1SDimitry Andric          Prefix[i] = IndentPrefix
fe6060f1SDimitry Andric                          .drop_back(std::min<std::size_t>(
fe6060f1SDimitry Andric                              -PrefixSpaceChange[i], SpacesInPrefix))
fe6060f1SDimitry Andric                          .str();
fe6060f1SDimitry Andric        } else {
fe6060f1SDimitry Andric          Prefix[i] = IndentPrefix.str();
fe6060f1SDimitry Andric        }
fe6060f1SDimitry Andric      } else {
fe6060f1SDimitry Andric        // If the IndentPrefix is the whole line, there is no content and we
fe6060f1SDimitry Andric        // drop just all space
fe6060f1SDimitry Andric        Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str();
0b57cec5SDimitry Andric      }
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric      Tokens[i] = LineTok;
0b57cec5SDimitry Andric      Content[i] = Lines[i].substr(IndentPrefix.size());
0b57cec5SDimitry Andric      ContentColumn[i] =
0b57cec5SDimitry Andric          StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn,
0b57cec5SDimitry Andric                                                      Style.TabWidth, Encoding);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric      // Calculate the end of the non-whitespace text in this line.
0b57cec5SDimitry Andric      size_t EndOfLine = Content[i].find_last_not_of(Blanks);
0b57cec5SDimitry Andric      if (EndOfLine == StringRef::npos)
0b57cec5SDimitry Andric        EndOfLine = Content[i].size();
0b57cec5SDimitry Andric      else
0b57cec5SDimitry Andric        ++EndOfLine;
0b57cec5SDimitry Andric      Content[i] = Content[i].substr(0, EndOfLine);
0b57cec5SDimitry Andric    }
0b57cec5SDimitry Andric    LineTok = CurrentTok->Next;
0b57cec5SDimitry Andric    if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
0b57cec5SDimitry Andric      // A line comment section needs to broken by a line comment that is
0b57cec5SDimitry Andric      // preceded by at least two newlines. Note that we put this break here
0b57cec5SDimitry Andric      // instead of breaking at a previous stage during parsing, since that
0b57cec5SDimitry Andric      // would split the contents of the enum into two unwrapped lines in this
0b57cec5SDimitry Andric      // example, which is undesirable:
0b57cec5SDimitry Andric      // enum A {
0b57cec5SDimitry Andric      //   a, // comment about a
0b57cec5SDimitry Andric      //
0b57cec5SDimitry Andric      //   // comment about b
0b57cec5SDimitry Andric      //   b
0b57cec5SDimitry Andric      // };
0b57cec5SDimitry Andric      //
0b57cec5SDimitry Andric      // FIXME: Consider putting separate line comment sections as children to
0b57cec5SDimitry Andric      // the unwrapped line instead.
0b57cec5SDimitry Andric      break;
0b57cec5SDimitry Andric    }
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricunsigned
0b57cec5SDimitry AndricBreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
0b57cec5SDimitry Andric                                            StringRef::size_type Length,
0b57cec5SDimitry Andric                                            unsigned StartColumn) const {
0b57cec5SDimitry Andric  return encoding::columnWidthWithTabs(
0b57cec5SDimitry Andric      Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
0b57cec5SDimitry Andric      Encoding);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
fe6060f1SDimitry Andricunsigned
fe6060f1SDimitry AndricBreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
fe6060f1SDimitry Andric                                                   bool /*Break*/) const {
0b57cec5SDimitry Andric  return ContentColumn[LineIndex];
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricvoid BreakableLineCommentSection::insertBreak(
0b57cec5SDimitry Andric    unsigned LineIndex, unsigned TailOffset, Split Split,
0b57cec5SDimitry Andric    unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
0b57cec5SDimitry Andric  StringRef Text = Content[LineIndex].substr(TailOffset);
0b57cec5SDimitry Andric  // Compute the offset of the split relative to the beginning of the token
0b57cec5SDimitry Andric  // text.
0b57cec5SDimitry Andric  unsigned BreakOffsetInToken =
0b57cec5SDimitry Andric      Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
0b57cec5SDimitry Andric  unsigned CharsToRemove = Split.second;
0b57cec5SDimitry Andric  Whitespaces.replaceWhitespaceInToken(
0b57cec5SDimitry Andric      tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
0b57cec5SDimitry Andric      Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
fe6060f1SDimitry Andric      /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size());
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry AndricBreakableComment::Split BreakableLineCommentSection::getReflowSplit(
480093f4SDimitry Andric    unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
0b57cec5SDimitry Andric  if (!mayReflow(LineIndex, CommentPragmasRegex))
0b57cec5SDimitry Andric    return Split(StringRef::npos, 0);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric  // In a line comment section each line is a separate token; thus, after a
0b57cec5SDimitry Andric  // split we replace all whitespace before the current line comment token
0b57cec5SDimitry Andric  // (which does not need to be included in the split), plus the start of the
0b57cec5SDimitry Andric  // line up to where the content starts.
0b57cec5SDimitry Andric  return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricvoid BreakableLineCommentSection::reflow(unsigned LineIndex,
0b57cec5SDimitry Andric                                         WhitespaceManager &Whitespaces) const {
0b57cec5SDimitry Andric  if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
0b57cec5SDimitry Andric    // Reflow happens between tokens. Replace the whitespace between the
0b57cec5SDimitry Andric    // tokens by the empty string.
0b57cec5SDimitry Andric    Whitespaces.replaceWhitespace(
0b57cec5SDimitry Andric        *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
5ffd83dbSDimitry Andric        /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true,
5ffd83dbSDimitry Andric        /*InPPDirective=*/false);
0b57cec5SDimitry Andric  } else if (LineIndex > 0) {
0b57cec5SDimitry Andric    // In case we're reflowing after the '\' in:
0b57cec5SDimitry Andric    //
0b57cec5SDimitry Andric    //   // line comment \
0b57cec5SDimitry Andric    //   // line 2
0b57cec5SDimitry Andric    //
0b57cec5SDimitry Andric    // the reflow happens inside the single comment token (it is a single line
0b57cec5SDimitry Andric    // comment with an unescaped newline).
0b57cec5SDimitry Andric    // Replace the whitespace between the '\' and '//' with the empty string.
0b57cec5SDimitry Andric    //
0b57cec5SDimitry Andric    // Offset points to after the '\' relative to start of the token.
0b57cec5SDimitry Andric    unsigned Offset = Lines[LineIndex - 1].data() +
0b57cec5SDimitry Andric                      Lines[LineIndex - 1].size() -
0b57cec5SDimitry Andric                      tokenAt(LineIndex - 1).TokenText.data();
0b57cec5SDimitry Andric    // WhitespaceLength is the number of chars between the '\' and the '//' on
0b57cec5SDimitry Andric    // the next line.
0b57cec5SDimitry Andric    unsigned WhitespaceLength =
0b57cec5SDimitry Andric        Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
0b57cec5SDimitry Andric    Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
0b57cec5SDimitry Andric                                         /*ReplaceChars=*/WhitespaceLength,
0b57cec5SDimitry Andric                                         /*PreviousPostfix=*/"",
0b57cec5SDimitry Andric                                         /*CurrentPrefix=*/"",
0b57cec5SDimitry Andric                                         /*InPPDirective=*/false,
0b57cec5SDimitry Andric                                         /*Newlines=*/0,
0b57cec5SDimitry Andric                                         /*Spaces=*/0);
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric  // Replace the indent and prefix of the token with the reflow prefix.
0b57cec5SDimitry Andric  unsigned Offset =
0b57cec5SDimitry Andric      Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
0b57cec5SDimitry Andric  unsigned WhitespaceLength =
0b57cec5SDimitry Andric      Content[LineIndex].data() - Lines[LineIndex].data();
0b57cec5SDimitry Andric  Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
0b57cec5SDimitry Andric                                       /*ReplaceChars=*/WhitespaceLength,
0b57cec5SDimitry Andric                                       /*PreviousPostfix=*/"",
0b57cec5SDimitry Andric                                       /*CurrentPrefix=*/ReflowPrefix,
0b57cec5SDimitry Andric                                       /*InPPDirective=*/false,
0b57cec5SDimitry Andric                                       /*Newlines=*/0,
0b57cec5SDimitry Andric                                       /*Spaces=*/0);
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricvoid BreakableLineCommentSection::adaptStartOfLine(
0b57cec5SDimitry Andric    unsigned LineIndex, WhitespaceManager &Whitespaces) const {
0b57cec5SDimitry Andric  // If this is the first line of a token, we need to inform Whitespace Manager
0b57cec5SDimitry Andric  // about it: either adapt the whitespace range preceding it, or mark it as an
0b57cec5SDimitry Andric  // untouchable token.
0b57cec5SDimitry Andric  // This happens for instance here:
0b57cec5SDimitry Andric  // // line 1 \
0b57cec5SDimitry Andric  // // line 2
0b57cec5SDimitry Andric  if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
0b57cec5SDimitry Andric    // This is the first line for the current token, but no reflow with the
0b57cec5SDimitry Andric    // previous token is necessary. However, we still may need to adjust the
0b57cec5SDimitry Andric    // start column. Note that ContentColumn[LineIndex] is the expected
0b57cec5SDimitry Andric    // content column after a possible update to the prefix, hence the prefix
0b57cec5SDimitry Andric    // length change is included.
0b57cec5SDimitry Andric    unsigned LineColumn =
0b57cec5SDimitry Andric        ContentColumn[LineIndex] -
0b57cec5SDimitry Andric        (Content[LineIndex].data() - Lines[LineIndex].data()) +
0b57cec5SDimitry Andric        (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric    // We always want to create a replacement instead of adding an untouchable
0b57cec5SDimitry Andric    // token, even if LineColumn is the same as the original column of the
0b57cec5SDimitry Andric    // token. This is because WhitespaceManager doesn't align trailing
0b57cec5SDimitry Andric    // comments if they are untouchable.
0b57cec5SDimitry Andric    Whitespaces.replaceWhitespace(*Tokens[LineIndex],
0b57cec5SDimitry Andric                                  /*Newlines=*/1,
0b57cec5SDimitry Andric                                  /*Spaces=*/LineColumn,
0b57cec5SDimitry Andric                                  /*StartOfTokenColumn=*/LineColumn,
5ffd83dbSDimitry Andric                                  /*IsAligned=*/true,
0b57cec5SDimitry Andric                                  /*InPPDirective=*/false);
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric  if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
0b57cec5SDimitry Andric    // Adjust the prefix if necessary.
fe6060f1SDimitry Andric    const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0);
fe6060f1SDimitry Andric    const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0);
0b57cec5SDimitry Andric    Whitespaces.replaceWhitespaceInToken(
fe6060f1SDimitry Andric        tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove,
fe6060f1SDimitry Andric        /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false,
fe6060f1SDimitry Andric        /*Newlines=*/0, /*Spaces=*/SpacesToAdd);
0b57cec5SDimitry Andric  }
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricvoid BreakableLineCommentSection::updateNextToken(LineState &State) const {
81ad6265SDimitry Andric  if (LastLineTok)
0b57cec5SDimitry Andric    State.NextToken = LastLineTok->Next;
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andricbool BreakableLineCommentSection::mayReflow(
480093f4SDimitry Andric    unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const {
0b57cec5SDimitry Andric  // Line comments have the indent as part of the prefix, so we need to
0b57cec5SDimitry Andric  // recompute the start of the line.
0b57cec5SDimitry Andric  StringRef IndentContent = Content[LineIndex];
5f757f3fSDimitry Andric  if (Lines[LineIndex].starts_with("//"))
0b57cec5SDimitry Andric    IndentContent = Lines[LineIndex].substr(2);
0b57cec5SDimitry Andric  // FIXME: Decide whether we want to reflow non-regular indents:
0b57cec5SDimitry Andric  // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
0b57cec5SDimitry Andric  // OriginalPrefix[LineIndex-1]. That means we don't reflow
0b57cec5SDimitry Andric  // // text that protrudes
0b57cec5SDimitry Andric  // //    into text with different indent
0b57cec5SDimitry Andric  // We do reflow in that case in block comments.
0b57cec5SDimitry Andric  return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
0b57cec5SDimitry Andric         mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
0b57cec5SDimitry Andric         !switchesFormatting(tokenAt(LineIndex)) &&
0b57cec5SDimitry Andric         OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
0b57cec5SDimitry Andric}
0b57cec5SDimitry Andric
0b57cec5SDimitry Andric} // namespace format
0b57cec5SDimitry Andric} // namespace clang