1e5dd7070Spatrick //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===// 2e5dd7070Spatrick // 3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information. 5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e5dd7070Spatrick // 7e5dd7070Spatrick //===----------------------------------------------------------------------===// 8e5dd7070Spatrick /// 9e5dd7070Spatrick /// \file 10e5dd7070Spatrick /// WhitespaceManager class manages whitespace around tokens and their 11e5dd7070Spatrick /// replacements. 12e5dd7070Spatrick /// 13e5dd7070Spatrick //===----------------------------------------------------------------------===// 14e5dd7070Spatrick 15e5dd7070Spatrick #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 16e5dd7070Spatrick #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H 17e5dd7070Spatrick 18e5dd7070Spatrick #include "TokenAnnotator.h" 19e5dd7070Spatrick #include "clang/Basic/SourceManager.h" 20e5dd7070Spatrick #include "clang/Format/Format.h" 21a9ac8606Spatrick #include "llvm/ADT/SmallVector.h" 22a9ac8606Spatrick #include <algorithm> 23e5dd7070Spatrick #include <string> 24ec727ea7Spatrick #include <tuple> 25e5dd7070Spatrick 26e5dd7070Spatrick namespace clang { 27e5dd7070Spatrick namespace format { 28e5dd7070Spatrick 29e5dd7070Spatrick /// Manages the whitespaces around tokens and their replacements. 30e5dd7070Spatrick /// 31e5dd7070Spatrick /// This includes special handling for certain constructs, e.g. the alignment of 32e5dd7070Spatrick /// trailing line comments. 33e5dd7070Spatrick /// 34e5dd7070Spatrick /// To guarantee correctness of alignment operations, the \c WhitespaceManager 35e5dd7070Spatrick /// must be informed about every token in the source file; for each token, there 36e5dd7070Spatrick /// must be exactly one call to either \c replaceWhitespace or 37e5dd7070Spatrick /// \c addUntouchableToken. 38e5dd7070Spatrick /// 39e5dd7070Spatrick /// There may be multiple calls to \c breakToken for a given token. 40e5dd7070Spatrick class WhitespaceManager { 41e5dd7070Spatrick public: WhitespaceManager(const SourceManager & SourceMgr,const FormatStyle & Style,bool UseCRLF)42e5dd7070Spatrick WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, 43e5dd7070Spatrick bool UseCRLF) 44e5dd7070Spatrick : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} 45e5dd7070Spatrick useCRLF()46e5dd7070Spatrick bool useCRLF() const { return UseCRLF; } 47e5dd7070Spatrick 48*12c85518Srobert /// Infers whether the input is using CRLF. 49*12c85518Srobert static bool inputUsesCRLF(StringRef Text, bool DefaultToCRLF); 50*12c85518Srobert 51e5dd7070Spatrick /// Replaces the whitespace in front of \p Tok. Only call once for 52e5dd7070Spatrick /// each \c AnnotatedToken. 53e5dd7070Spatrick /// 54e5dd7070Spatrick /// \p StartOfTokenColumn is the column at which the token will start after 55e5dd7070Spatrick /// this replacement. It is needed for determining how \p Spaces is turned 56e5dd7070Spatrick /// into tabs and spaces for some format styles. 57e5dd7070Spatrick void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, 58ec727ea7Spatrick unsigned StartOfTokenColumn, bool isAligned = false, 59e5dd7070Spatrick bool InPPDirective = false); 60e5dd7070Spatrick 61e5dd7070Spatrick /// Adds information about an unchangeable token's whitespace. 62e5dd7070Spatrick /// 63e5dd7070Spatrick /// Needs to be called for every token for which \c replaceWhitespace 64e5dd7070Spatrick /// was not called. 65e5dd7070Spatrick void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); 66e5dd7070Spatrick 67e5dd7070Spatrick llvm::Error addReplacement(const tooling::Replacement &Replacement); 68e5dd7070Spatrick 69e5dd7070Spatrick /// Inserts or replaces whitespace in the middle of a token. 70e5dd7070Spatrick /// 71e5dd7070Spatrick /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix 72e5dd7070Spatrick /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars 73e5dd7070Spatrick /// characters. 74e5dd7070Spatrick /// 75e5dd7070Spatrick /// Note: \p Spaces can be negative to retain information about initial 76e5dd7070Spatrick /// relative column offset between a line of a block comment and the start of 77e5dd7070Spatrick /// the comment. This negative offset may be compensated by trailing comment 78e5dd7070Spatrick /// alignment here. In all other cases negative \p Spaces will be truncated to 79e5dd7070Spatrick /// 0. 80e5dd7070Spatrick /// 81e5dd7070Spatrick /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is 82e5dd7070Spatrick /// used to align backslashes correctly. 83e5dd7070Spatrick void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, 84e5dd7070Spatrick unsigned ReplaceChars, 85e5dd7070Spatrick StringRef PreviousPostfix, 86e5dd7070Spatrick StringRef CurrentPrefix, bool InPPDirective, 87e5dd7070Spatrick unsigned Newlines, int Spaces); 88e5dd7070Spatrick 89e5dd7070Spatrick /// Returns all the \c Replacements created during formatting. 90e5dd7070Spatrick const tooling::Replacements &generateReplacements(); 91e5dd7070Spatrick 92e5dd7070Spatrick /// Represents a change before a token, a break inside a token, 93e5dd7070Spatrick /// or the layout of an unchanged token (or whitespace within). 94e5dd7070Spatrick struct Change { 95e5dd7070Spatrick /// Functor to sort changes in original source order. 96e5dd7070Spatrick class IsBeforeInFile { 97e5dd7070Spatrick public: IsBeforeInFileChange98e5dd7070Spatrick IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} 99e5dd7070Spatrick bool operator()(const Change &C1, const Change &C2) const; 100e5dd7070Spatrick 101e5dd7070Spatrick private: 102e5dd7070Spatrick const SourceManager &SourceMgr; 103e5dd7070Spatrick }; 104e5dd7070Spatrick 105e5dd7070Spatrick /// Creates a \c Change. 106e5dd7070Spatrick /// 107e5dd7070Spatrick /// The generated \c Change will replace the characters at 108e5dd7070Spatrick /// \p OriginalWhitespaceRange with a concatenation of 109e5dd7070Spatrick /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces 110e5dd7070Spatrick /// and \p CurrentLinePrefix. 111e5dd7070Spatrick /// 112e5dd7070Spatrick /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out 113e5dd7070Spatrick /// trailing comments and escaped newlines. 114e5dd7070Spatrick Change(const FormatToken &Tok, bool CreateReplacement, 115e5dd7070Spatrick SourceRange OriginalWhitespaceRange, int Spaces, 116e5dd7070Spatrick unsigned StartOfTokenColumn, unsigned NewlinesBefore, 117e5dd7070Spatrick StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, 118ec727ea7Spatrick bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken); 119e5dd7070Spatrick 120e5dd7070Spatrick // The kind of the token whose whitespace this change replaces, or in which 121e5dd7070Spatrick // this change inserts whitespace. 122e5dd7070Spatrick // FIXME: Currently this is not set correctly for breaks inside comments, as 123e5dd7070Spatrick // the \c BreakableToken is still doing its own alignment. 124e5dd7070Spatrick const FormatToken *Tok; 125e5dd7070Spatrick 126e5dd7070Spatrick bool CreateReplacement; 127e5dd7070Spatrick // Changes might be in the middle of a token, so we cannot just keep the 128e5dd7070Spatrick // FormatToken around to query its information. 129e5dd7070Spatrick SourceRange OriginalWhitespaceRange; 130e5dd7070Spatrick unsigned StartOfTokenColumn; 131e5dd7070Spatrick unsigned NewlinesBefore; 132e5dd7070Spatrick std::string PreviousLinePostfix; 133e5dd7070Spatrick std::string CurrentLinePrefix; 134ec727ea7Spatrick bool IsAligned; 135e5dd7070Spatrick bool ContinuesPPDirective; 136e5dd7070Spatrick 137e5dd7070Spatrick // The number of spaces in front of the token or broken part of the token. 138e5dd7070Spatrick // This will be adapted when aligning tokens. 139e5dd7070Spatrick // Can be negative to retain information about the initial relative offset 140e5dd7070Spatrick // of the lines in a block comment. This is used when aligning trailing 141e5dd7070Spatrick // comments. Uncompensated negative offset is truncated to 0. 142e5dd7070Spatrick int Spaces; 143e5dd7070Spatrick 144e5dd7070Spatrick // If this change is inside of a token but not at the start of the token or 145e5dd7070Spatrick // directly after a newline. 146e5dd7070Spatrick bool IsInsideToken; 147e5dd7070Spatrick 148e5dd7070Spatrick // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and 149e5dd7070Spatrick // \c EscapedNewlineColumn will be calculated in 150e5dd7070Spatrick // \c calculateLineBreakInformation. 151e5dd7070Spatrick bool IsTrailingComment; 152e5dd7070Spatrick unsigned TokenLength; 153e5dd7070Spatrick unsigned PreviousEndOfTokenColumn; 154e5dd7070Spatrick unsigned EscapedNewlineColumn; 155e5dd7070Spatrick 156e5dd7070Spatrick // These fields are used to retain correct relative line indentation in a 157e5dd7070Spatrick // block comment when aligning trailing comments. 158e5dd7070Spatrick // 159e5dd7070Spatrick // If this Change represents a continuation of a block comment, 160e5dd7070Spatrick // \c StartOfBlockComment is pointer to the first Change in the block 161e5dd7070Spatrick // comment. \c IndentationOffset is a relative column offset to this 162e5dd7070Spatrick // change, so that the correct column can be reconstructed at the end of 163e5dd7070Spatrick // the alignment process. 164e5dd7070Spatrick const Change *StartOfBlockComment; 165e5dd7070Spatrick int IndentationOffset; 166e5dd7070Spatrick 167ec727ea7Spatrick // Depth of conditionals. Computed from tracking fake parenthesis, except 168ec727ea7Spatrick // it does not increase the indent for "chained" conditionals. 169ec727ea7Spatrick int ConditionalsLevel; 170ec727ea7Spatrick 171ec727ea7Spatrick // A combination of indent, nesting and conditionals levels, which are used 172ec727ea7Spatrick // in tandem to compute lexical scope, for the purposes of deciding 173e5dd7070Spatrick // when to stop consecutive alignment runs. indentAndNestingLevelChange174ec727ea7Spatrick std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const { 175ec727ea7Spatrick return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel, 176ec727ea7Spatrick ConditionalsLevel); 177e5dd7070Spatrick } 178e5dd7070Spatrick }; 179e5dd7070Spatrick 180e5dd7070Spatrick private: 181a9ac8606Spatrick struct CellDescription { 182a9ac8606Spatrick unsigned Index = 0; 183a9ac8606Spatrick unsigned Cell = 0; 184a9ac8606Spatrick unsigned EndIndex = 0; 185a9ac8606Spatrick bool HasSplit = false; 186a9ac8606Spatrick CellDescription *NextColumnElement = nullptr; 187a9ac8606Spatrick 188a9ac8606Spatrick constexpr bool operator==(const CellDescription &Other) const { 189a9ac8606Spatrick return Index == Other.Index && Cell == Other.Cell && 190a9ac8606Spatrick EndIndex == Other.EndIndex; 191a9ac8606Spatrick } 192a9ac8606Spatrick constexpr bool operator!=(const CellDescription &Other) const { 193a9ac8606Spatrick return !(*this == Other); 194a9ac8606Spatrick } 195a9ac8606Spatrick }; 196a9ac8606Spatrick 197a9ac8606Spatrick struct CellDescriptions { 198a9ac8606Spatrick SmallVector<CellDescription> Cells; 199*12c85518Srobert SmallVector<unsigned> CellCounts; 200a9ac8606Spatrick unsigned InitialSpaces = 0; 201*12c85518Srobert 202*12c85518Srobert // Determine if every row in the array 203*12c85518Srobert // has the same number of columns. isRectangularCellDescriptions204*12c85518Srobert bool isRectangular() const { 205*12c85518Srobert if (CellCounts.empty()) 206*12c85518Srobert return false; 207*12c85518Srobert 208*12c85518Srobert for (auto NumberOfColumns : CellCounts) 209*12c85518Srobert if (NumberOfColumns != CellCounts[0]) 210*12c85518Srobert return false; 211*12c85518Srobert return true; 212*12c85518Srobert } 213a9ac8606Spatrick }; 214a9ac8606Spatrick 215e5dd7070Spatrick /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens 216e5dd7070Spatrick /// or token parts in a line and \c PreviousEndOfTokenColumn and 217e5dd7070Spatrick /// \c EscapedNewlineColumn for the first tokens or token parts in a line. 218e5dd7070Spatrick void calculateLineBreakInformation(); 219e5dd7070Spatrick 220e5dd7070Spatrick /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes. 221e5dd7070Spatrick void alignConsecutiveMacros(); 222e5dd7070Spatrick 223e5dd7070Spatrick /// Align consecutive assignments over all \c Changes. 224e5dd7070Spatrick void alignConsecutiveAssignments(); 225e5dd7070Spatrick 226ec727ea7Spatrick /// Align consecutive bitfields over all \c Changes. 227ec727ea7Spatrick void alignConsecutiveBitFields(); 228ec727ea7Spatrick 229e5dd7070Spatrick /// Align consecutive declarations over all \c Changes. 230e5dd7070Spatrick void alignConsecutiveDeclarations(); 231e5dd7070Spatrick 232ec727ea7Spatrick /// Align consecutive declarations over all \c Changes. 233ec727ea7Spatrick void alignChainedConditionals(); 234ec727ea7Spatrick 235e5dd7070Spatrick /// Align trailing comments over all \c Changes. 236e5dd7070Spatrick void alignTrailingComments(); 237e5dd7070Spatrick 238e5dd7070Spatrick /// Align trailing comments from change \p Start to change \p End at 239e5dd7070Spatrick /// the specified \p Column. 240e5dd7070Spatrick void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); 241e5dd7070Spatrick 242e5dd7070Spatrick /// Align escaped newlines over all \c Changes. 243e5dd7070Spatrick void alignEscapedNewlines(); 244e5dd7070Spatrick 245e5dd7070Spatrick /// Align escaped newlines from change \p Start to change \p End at 246e5dd7070Spatrick /// the specified \p Column. 247e5dd7070Spatrick void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); 248e5dd7070Spatrick 249a9ac8606Spatrick /// Align Array Initializers over all \c Changes. 250a9ac8606Spatrick void alignArrayInitializers(); 251a9ac8606Spatrick 252a9ac8606Spatrick /// Align Array Initializers from change \p Start to change \p End at 253a9ac8606Spatrick /// the specified \p Column. 254a9ac8606Spatrick void alignArrayInitializers(unsigned Start, unsigned End); 255a9ac8606Spatrick 256a9ac8606Spatrick /// Align Array Initializers being careful to right justify the columns 257a9ac8606Spatrick /// as described by \p CellDescs. 258a9ac8606Spatrick void alignArrayInitializersRightJustified(CellDescriptions &&CellDescs); 259a9ac8606Spatrick 260*12c85518Srobert /// Align Array Initializers being careful to left justify the columns 261a9ac8606Spatrick /// as described by \p CellDescs. 262a9ac8606Spatrick void alignArrayInitializersLeftJustified(CellDescriptions &&CellDescs); 263a9ac8606Spatrick 264a9ac8606Spatrick /// Calculate the cell width between two indexes. 265a9ac8606Spatrick unsigned calculateCellWidth(unsigned Start, unsigned End, 266a9ac8606Spatrick bool WithSpaces = false) const; 267a9ac8606Spatrick 268a9ac8606Spatrick /// Get a set of fully specified CellDescriptions between \p Start and 269a9ac8606Spatrick /// \p End of the change list. 270a9ac8606Spatrick CellDescriptions getCells(unsigned Start, unsigned End); 271a9ac8606Spatrick 272a9ac8606Spatrick /// Does this \p Cell contain a split element? 273a9ac8606Spatrick static bool isSplitCell(const CellDescription &Cell); 274a9ac8606Spatrick 275*12c85518Srobert /// Get the width of the preceding cells from \p Start to \p End. 276a9ac8606Spatrick template <typename I> getNetWidth(const I & Start,const I & End,unsigned InitialSpaces)277a9ac8606Spatrick auto getNetWidth(const I &Start, const I &End, unsigned InitialSpaces) const { 278a9ac8606Spatrick auto NetWidth = InitialSpaces; 279a9ac8606Spatrick for (auto PrevIter = Start; PrevIter != End; ++PrevIter) { 280a9ac8606Spatrick // If we broke the line the initial spaces are already 281a9ac8606Spatrick // accounted for. 282a9ac8606Spatrick if (Changes[PrevIter->Index].NewlinesBefore > 0) 283a9ac8606Spatrick NetWidth = 0; 284a9ac8606Spatrick NetWidth += 285a9ac8606Spatrick calculateCellWidth(PrevIter->Index, PrevIter->EndIndex, true) + 1; 286a9ac8606Spatrick } 287a9ac8606Spatrick return NetWidth; 288a9ac8606Spatrick } 289a9ac8606Spatrick 290a9ac8606Spatrick /// Get the maximum width of a cell in a sequence of columns. 291a9ac8606Spatrick template <typename I> getMaximumCellWidth(I CellIter,unsigned NetWidth)292a9ac8606Spatrick unsigned getMaximumCellWidth(I CellIter, unsigned NetWidth) const { 293a9ac8606Spatrick unsigned CellWidth = 294a9ac8606Spatrick calculateCellWidth(CellIter->Index, CellIter->EndIndex, true); 295a9ac8606Spatrick if (Changes[CellIter->Index].NewlinesBefore == 0) 296a9ac8606Spatrick CellWidth += NetWidth; 297a9ac8606Spatrick for (const auto *Next = CellIter->NextColumnElement; Next != nullptr; 298a9ac8606Spatrick Next = Next->NextColumnElement) { 299a9ac8606Spatrick auto ThisWidth = calculateCellWidth(Next->Index, Next->EndIndex, true); 300a9ac8606Spatrick if (Changes[Next->Index].NewlinesBefore == 0) 301a9ac8606Spatrick ThisWidth += NetWidth; 302a9ac8606Spatrick CellWidth = std::max(CellWidth, ThisWidth); 303a9ac8606Spatrick } 304a9ac8606Spatrick return CellWidth; 305a9ac8606Spatrick } 306a9ac8606Spatrick 307a9ac8606Spatrick /// Get The maximum width of all columns to a given cell. 308a9ac8606Spatrick template <typename I> getMaximumNetWidth(const I & CellStart,const I & CellStop,unsigned InitialSpaces,unsigned CellCount,unsigned MaxRowCount)309a9ac8606Spatrick unsigned getMaximumNetWidth(const I &CellStart, const I &CellStop, 310*12c85518Srobert unsigned InitialSpaces, unsigned CellCount, 311*12c85518Srobert unsigned MaxRowCount) const { 312a9ac8606Spatrick auto MaxNetWidth = getNetWidth(CellStart, CellStop, InitialSpaces); 313a9ac8606Spatrick auto RowCount = 1U; 314a9ac8606Spatrick auto Offset = std::distance(CellStart, CellStop); 315a9ac8606Spatrick for (const auto *Next = CellStop->NextColumnElement; Next != nullptr; 316a9ac8606Spatrick Next = Next->NextColumnElement) { 317*12c85518Srobert if (RowCount > MaxRowCount) 318*12c85518Srobert break; 319a9ac8606Spatrick auto Start = (CellStart + RowCount * CellCount); 320a9ac8606Spatrick auto End = Start + Offset; 321a9ac8606Spatrick MaxNetWidth = 322a9ac8606Spatrick std::max(MaxNetWidth, getNetWidth(Start, End, InitialSpaces)); 323a9ac8606Spatrick ++RowCount; 324a9ac8606Spatrick } 325a9ac8606Spatrick return MaxNetWidth; 326a9ac8606Spatrick } 327a9ac8606Spatrick 328a9ac8606Spatrick /// Align a split cell with a newline to the first element in the cell. 329a9ac8606Spatrick void alignToStartOfCell(unsigned Start, unsigned End); 330a9ac8606Spatrick 331a9ac8606Spatrick /// Link the Cell pointers in the list of Cells. 332a9ac8606Spatrick static CellDescriptions linkCells(CellDescriptions &&CellDesc); 333a9ac8606Spatrick 334e5dd7070Spatrick /// Fill \c Replaces with the replacements for all effective changes. 335e5dd7070Spatrick void generateChanges(); 336e5dd7070Spatrick 337e5dd7070Spatrick /// Stores \p Text as the replacement for the whitespace in \p Range. 338e5dd7070Spatrick void storeReplacement(SourceRange Range, StringRef Text); 339e5dd7070Spatrick void appendNewlineText(std::string &Text, unsigned Newlines); 340e5dd7070Spatrick void appendEscapedNewlineText(std::string &Text, unsigned Newlines, 341e5dd7070Spatrick unsigned PreviousEndOfTokenColumn, 342e5dd7070Spatrick unsigned EscapedNewlineColumn); 343e5dd7070Spatrick void appendIndentText(std::string &Text, unsigned IndentLevel, 344ec727ea7Spatrick unsigned Spaces, unsigned WhitespaceStartColumn, 345ec727ea7Spatrick bool IsAligned); 346ec727ea7Spatrick unsigned appendTabIndent(std::string &Text, unsigned Spaces, 347ec727ea7Spatrick unsigned Indentation); 348e5dd7070Spatrick 349e5dd7070Spatrick SmallVector<Change, 16> Changes; 350e5dd7070Spatrick const SourceManager &SourceMgr; 351e5dd7070Spatrick tooling::Replacements Replaces; 352e5dd7070Spatrick const FormatStyle &Style; 353e5dd7070Spatrick bool UseCRLF; 354e5dd7070Spatrick }; 355e5dd7070Spatrick 356e5dd7070Spatrick } // namespace format 357e5dd7070Spatrick } // namespace clang 358e5dd7070Spatrick 359e5dd7070Spatrick #endif 360