xref: /llvm-project/clang-tools-extra/clangd/Format.cpp (revision aec7670b5d6354b63b011c9ed0632b70983b0328)
1 //===--- Format.cpp -----------------------------------------*- C++-*------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "Format.h"
9 #include "support/Logger.h"
10 #include "clang/Basic/SourceManager.h"
11 #include "clang/Format/Format.h"
12 #include "clang/Lex/Lexer.h"
13 #include "clang/Tooling/Core/Replacement.h"
14 #include "llvm/Support/Unicode.h"
15 
16 namespace clang {
17 namespace clangd {
18 namespace {
19 
20 /// Append closing brackets )]} to \p Code to make it well-formed.
21 /// Clang-format conservatively refuses to format files with unmatched brackets
22 /// as it isn't sure where the errors are and so can't correct.
23 /// When editing, it's reasonable to assume code before the cursor is complete.
closeBrackets(std::string & Code,const format::FormatStyle & Style)24 void closeBrackets(std::string &Code, const format::FormatStyle &Style) {
25   SourceManagerForFile FileSM("mock_file.cpp", Code);
26   auto &SM = FileSM.get();
27   FileID FID = SM.getMainFileID();
28   LangOptions LangOpts = format::getFormattingLangOpts(Style);
29   Lexer Lex(FID, SM.getBufferOrFake(FID), SM, LangOpts);
30   Token Tok;
31   std::vector<char> Brackets;
32   while (!Lex.LexFromRawLexer(Tok)) {
33     switch(Tok.getKind()) {
34       case tok::l_paren:
35         Brackets.push_back(')');
36         break;
37       case tok::l_brace:
38         Brackets.push_back('}');
39         break;
40       case tok::l_square:
41         Brackets.push_back(']');
42         break;
43       case tok::r_paren:
44         if (!Brackets.empty() && Brackets.back() == ')')
45           Brackets.pop_back();
46         break;
47       case tok::r_brace:
48         if (!Brackets.empty() && Brackets.back() == '}')
49           Brackets.pop_back();
50         break;
51       case tok::r_square:
52         if (!Brackets.empty() && Brackets.back() == ']')
53           Brackets.pop_back();
54         break;
55       default:
56         continue;
57     }
58   }
59   // Attempt to end any open comments first.
60   Code.append("\n// */\n");
61   Code.append(Brackets.rbegin(), Brackets.rend());
62 }
63 
commentMarker(llvm::StringRef Line)64 static StringRef commentMarker(llvm::StringRef Line) {
65   for (StringRef Marker : {"///", "//"}){
66     auto I = Line.rfind(Marker);
67     if (I != StringRef::npos)
68       return Line.substr(I, Marker.size());
69   }
70   return "";
71 }
72 
firstLine(llvm::StringRef Code)73 llvm::StringRef firstLine(llvm::StringRef Code) {
74   return Code.take_until([](char C) { return C == '\n'; });
75 }
76 
lastLine(llvm::StringRef Code)77 llvm::StringRef lastLine(llvm::StringRef Code) {
78   llvm::StringRef Rest = Code;
79   while (!Rest.empty() && Rest.back() != '\n')
80     Rest = Rest.drop_back();
81   return Code.substr(Rest.size());
82 }
83 
84 // Filename is needed for tooling::Replacement and some overloads of reformat().
85 // Its value should not affect the outcome. We use the default from reformat().
86 llvm::StringRef Filename = "<stdin>";
87 
88 // tooling::Replacement from overlapping StringRefs: From must be part of Code.
replacement(llvm::StringRef Code,llvm::StringRef From,llvm::StringRef To)89 tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From,
90                                  llvm::StringRef To) {
91   assert(From.begin() >= Code.begin() && From.end() <= Code.end());
92   // The filename is required but ignored.
93   return tooling::Replacement(Filename, From.data() - Code.data(),
94                               From.size(), To);
95 }
96 
97 // High-level representation of incremental formatting changes.
98 // The changes are made in two steps.
99 // 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding
100 //    comment markers when splitting a line comment with a newline).
101 // 2) a selective clang-format run:
102 //    - the "source code" passed to clang format is the code up to the cursor,
103 //      a placeholder for the cursor, and some closing brackets
104 //    - the formatting is restricted to the cursor and (possibly) other ranges
105 //      (e.g. the old line when inserting a newline).
106 //    - changes before the cursor are applied, those after are discarded.
107 struct IncrementalChanges {
108   // Changes that should be applied before running clang-format.
109   tooling::Replacements Changes;
110   // Ranges of the original source code that should be clang-formatted.
111   // The CursorProxyText will also be formatted.
112   std::vector<tooling::Range> FormatRanges;
113   // The source code that should stand in for the cursor when clang-formatting.
114   // e.g. after inserting a newline, a line-comment at the cursor is used to
115   // ensure that the newline is preserved.
116   std::string CursorPlaceholder;
117 };
118 
119 // The two functions below, columnWidth() and columnWidthWithTabs(), were
120 // adapted from similar functions in clang/lib/Format/Encoding.h.
121 // FIXME: Move those functions to clang/include/clang/Format.h and reuse them?
122 
123 // Helper function for columnWidthWithTabs().
columnWidth(StringRef Text)124 inline unsigned columnWidth(StringRef Text) {
125   int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
126   if (ContentWidth < 0)
127     return Text.size(); // fallback for unprintable characters
128   return ContentWidth;
129 }
130 
131 // Returns the number of columns required to display the \p Text on a terminal
132 // with the \p TabWidth.
columnWidthWithTabs(StringRef Text,unsigned TabWidth)133 inline unsigned columnWidthWithTabs(StringRef Text, unsigned TabWidth) {
134   unsigned TotalWidth = 0;
135   StringRef Tail = Text;
136   for (;;) {
137     StringRef::size_type TabPos = Tail.find('\t');
138     if (TabPos == StringRef::npos)
139       return TotalWidth + columnWidth(Tail);
140     TotalWidth += columnWidth(Tail.substr(0, TabPos));
141     if (TabWidth)
142       TotalWidth += TabWidth - TotalWidth % TabWidth;
143     Tail = Tail.substr(TabPos + 1);
144   }
145 }
146 
147 // After a newline:
148 //  - we continue any line-comment that was split
149 //  - we format the old line in addition to the cursor
150 //  - we represent the cursor with a line comment to preserve the newline
getIncrementalChangesAfterNewline(llvm::StringRef Code,unsigned Cursor,unsigned TabWidth)151 IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code,
152                                                      unsigned Cursor,
153                                                      unsigned TabWidth) {
154   IncrementalChanges Result;
155   // Before newline, code looked like:
156   //    leading^trailing
157   // After newline, code looks like:
158   //    leading
159   //    indentation^trailing
160   // Where indentation was added by the editor.
161   StringRef Trailing = firstLine(Code.substr(Cursor));
162   StringRef Indentation = lastLine(Code.take_front(Cursor));
163   if (Indentation.data() == Code.data()) {
164     vlog("Typed a newline, but we're still on the first line!");
165     return Result;
166   }
167   StringRef Leading =
168       lastLine(Code.take_front(Indentation.data() - Code.data() - 1));
169   StringRef NextLine = firstLine(Code.substr(Cursor + Trailing.size() + 1));
170 
171   // Strip leading whitespace on trailing line.
172   StringRef TrailingTrim = Trailing.ltrim();
173   if (unsigned TrailWS = Trailing.size() - TrailingTrim.size())
174     cantFail(Result.Changes.add(
175         replacement(Code, StringRef(Trailing.begin(), TrailWS), "")));
176 
177   // If we split a comment, replace indentation with a comment marker.
178   // If the editor made the new line a comment, also respect that.
179   StringRef CommentMarker = commentMarker(Leading);
180   bool NewLineIsComment = !commentMarker(Indentation).empty();
181   if (!CommentMarker.empty() &&
182       (NewLineIsComment || !commentMarker(NextLine).empty() ||
183        (!TrailingTrim.empty() && !TrailingTrim.starts_with("//")))) {
184     // We indent the new comment to match the previous one.
185     StringRef PreComment =
186         Leading.take_front(CommentMarker.data() - Leading.data());
187     std::string IndentAndComment =
188         (std::string(columnWidthWithTabs(PreComment, TabWidth), ' ') +
189          CommentMarker + " ")
190             .str();
191     cantFail(
192         Result.Changes.add(replacement(Code, Indentation, IndentAndComment)));
193   } else {
194     // Remove any indentation and let clang-format re-add it.
195     // This prevents the cursor marker dragging e.g. an aligned comment with it.
196     cantFail(Result.Changes.add(replacement(Code, Indentation, "")));
197   }
198 
199   // If we put a the newline inside a {} pair, put } on its own line...
200   if (CommentMarker.empty() && Leading.ends_with("{") &&
201       Trailing.starts_with("}")) {
202     cantFail(
203         Result.Changes.add(replacement(Code, Trailing.take_front(1), "\n}")));
204     // ...and format it.
205     Result.FormatRanges.push_back(
206         tooling::Range(Trailing.data() - Code.data() + 1, 1));
207   }
208 
209   // Format the whole leading line.
210   Result.FormatRanges.push_back(
211       tooling::Range(Leading.data() - Code.data(), Leading.size()));
212 
213   // We use a comment to represent the cursor, to preserve the newline.
214   // A trailing identifier improves parsing of e.g. for without braces.
215   // Exception: if the previous line has a trailing comment, we can't use one
216   // as the cursor (they will be aligned). But in this case we don't need to.
217   Result.CursorPlaceholder = !CommentMarker.empty() ? "ident" : "//==\nident";
218 
219   return Result;
220 }
221 
getIncrementalChanges(llvm::StringRef Code,unsigned Cursor,llvm::StringRef InsertedText,unsigned TabWidth)222 IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor,
223                                          llvm::StringRef InsertedText,
224                                          unsigned TabWidth) {
225   IncrementalChanges Result;
226   if (InsertedText == "\n")
227     return getIncrementalChangesAfterNewline(Code, Cursor, TabWidth);
228 
229   Result.CursorPlaceholder = " /**/";
230   return Result;
231 }
232 
233 // Returns equivalent replacements that preserve the correspondence between
234 // OldCursor and NewCursor. If OldCursor lies in a replaced region, that
235 // replacement will be split.
236 std::vector<tooling::Replacement>
split(const tooling::Replacements & Replacements,unsigned OldCursor,unsigned NewCursor)237 split(const tooling::Replacements &Replacements, unsigned OldCursor,
238       unsigned NewCursor) {
239   std::vector<tooling::Replacement> Result;
240   int LengthChange = 0;
241   for (const tooling::Replacement &R : Replacements) {
242     if (R.getOffset() + R.getLength() <= OldCursor) { // before cursor
243       Result.push_back(R);
244       LengthChange += R.getReplacementText().size() - R.getLength();
245     } else if (R.getOffset() < OldCursor) { // overlaps cursor
246       int ReplacementSplit = NewCursor - LengthChange - R.getOffset();
247       assert(ReplacementSplit >= 0 &&
248              ReplacementSplit <= int(R.getReplacementText().size()) &&
249              "NewCursor incompatible with OldCursor!");
250       Result.push_back(tooling::Replacement(
251           R.getFilePath(), R.getOffset(), OldCursor - R.getOffset(),
252           R.getReplacementText().take_front(ReplacementSplit)));
253       Result.push_back(tooling::Replacement(
254           R.getFilePath(), OldCursor,
255           R.getLength() - (OldCursor - R.getOffset()),
256           R.getReplacementText().drop_front(ReplacementSplit)));
257     } else if (R.getOffset() >= OldCursor) { // after cursor
258       Result.push_back(R);
259     }
260   }
261   return Result;
262 }
263 
264 } // namespace
265 
266 // We're simulating the following sequence of changes:
267 //   - apply the pre-formatting edits (see getIncrementalChanges)
268 //   - insert a placeholder for the cursor
269 //   - format some of the resulting code
270 //   - remove the cursor placeholder again
271 // The replacements we return are produced by composing these.
272 //
273 // The text we actually pass to clang-format is slightly different from this,
274 // e.g. we have to close brackets. We ensure these differences are *after*
275 // all the regions we want to format, and discard changes in them.
276 std::vector<tooling::Replacement>
formatIncremental(llvm::StringRef OriginalCode,unsigned OriginalCursor,llvm::StringRef InsertedText,format::FormatStyle Style)277 formatIncremental(llvm::StringRef OriginalCode, unsigned OriginalCursor,
278                   llvm::StringRef InsertedText, format::FormatStyle Style) {
279   IncrementalChanges Incremental = getIncrementalChanges(
280       OriginalCode, OriginalCursor, InsertedText, Style.TabWidth);
281   // Never *remove* lines in response to pressing enter! This annoys users.
282   if (InsertedText == "\n") {
283     Style.MaxEmptyLinesToKeep = 1000;
284     Style.KeepEmptyLines.AtStartOfBlock = true;
285   }
286 
287   // Compute the code we want to format:
288   // 1) Start with code after the pre-formatting edits.
289   std::string CodeToFormat = cantFail(
290       tooling::applyAllReplacements(OriginalCode, Incremental.Changes));
291   unsigned Cursor = Incremental.Changes.getShiftedCodePosition(OriginalCursor);
292   // 2) Truncate code after the last interesting range.
293   unsigned FormatLimit = Cursor;
294   for (tooling::Range &R : Incremental.FormatRanges)
295     FormatLimit = std::max(FormatLimit, R.getOffset() + R.getLength());
296   CodeToFormat.resize(FormatLimit);
297   // 3) Insert a placeholder for the cursor.
298   CodeToFormat.insert(Cursor, Incremental.CursorPlaceholder);
299   // 4) Append brackets after FormatLimit so the code is well-formed.
300   closeBrackets(CodeToFormat, Style);
301 
302   // Determine the ranges to format:
303   std::vector<tooling::Range> RangesToFormat = Incremental.FormatRanges;
304   // Ranges after the cursor need to be adjusted for the placeholder.
305   for (auto &R : RangesToFormat) {
306     if (R.getOffset() > Cursor)
307       R = tooling::Range(R.getOffset() + Incremental.CursorPlaceholder.size(),
308                          R.getLength());
309   }
310   // We also format the cursor.
311   RangesToFormat.push_back(
312       tooling::Range(Cursor, Incremental.CursorPlaceholder.size()));
313   // Also update FormatLimit for the placeholder, we'll use this later.
314   FormatLimit += Incremental.CursorPlaceholder.size();
315 
316   // Run clang-format, and truncate changes at FormatLimit.
317   tooling::Replacements FormattingChanges;
318   format::FormattingAttemptStatus Status;
319   for (const tooling::Replacement &R : format::reformat(
320            Style, CodeToFormat, RangesToFormat, Filename, &Status)) {
321     if (R.getOffset() + R.getLength() <= FormatLimit) // Before limit.
322       cantFail(FormattingChanges.add(R));
323     else if(R.getOffset() < FormatLimit) { // Overlaps limit.
324       if (R.getReplacementText().empty()) // Deletions are easy to handle.
325         cantFail(FormattingChanges.add(tooling::Replacement(Filename,
326             R.getOffset(), FormatLimit - R.getOffset(), "")));
327       else
328         // Hopefully won't happen in practice?
329         elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}",
330              Cursor, CodeToFormat);
331     }
332   }
333   if (!Status.FormatComplete)
334     vlog("Incremental format incomplete at line {0}", Status.Line);
335 
336   // Now we are ready to compose the changes relative to OriginalCode.
337   //   edits -> insert placeholder -> format -> remove placeholder.
338   // We must express insert/remove as Replacements.
339   tooling::Replacements InsertCursorPlaceholder(
340       tooling::Replacement(Filename, Cursor, 0, Incremental.CursorPlaceholder));
341   unsigned FormattedCursorStart =
342                FormattingChanges.getShiftedCodePosition(Cursor),
343            FormattedCursorEnd = FormattingChanges.getShiftedCodePosition(
344                Cursor + Incremental.CursorPlaceholder.size());
345   tooling::Replacements RemoveCursorPlaceholder(
346       tooling::Replacement(Filename, FormattedCursorStart,
347                            FormattedCursorEnd - FormattedCursorStart, ""));
348 
349   // We can't simply merge() and return: tooling::Replacements will combine
350   // adjacent edits left and right of the cursor. This gives the right source
351   // code, but loses information about where the cursor is!
352   // Fortunately, none of the individual passes lose information, so:
353   //  - we use merge() to compute the final Replacements
354   //  - we chain getShiftedCodePosition() to compute final cursor position
355   //  - we split the final Replacements at the cursor position, so that
356   //    each Replacement lies either before or after the cursor.
357   tooling::Replacements Final;
358   unsigned FinalCursor = OriginalCursor;
359 #ifndef NDEBUG
360   std::string FinalCode = std::string(OriginalCode);
361   dlog("Initial code: {0}", FinalCode);
362 #endif
363   for (auto Pass :
364        std::vector<std::pair<const char *, const tooling::Replacements *>>{
365            {"Pre-formatting changes", &Incremental.Changes},
366            {"Insert placeholder", &InsertCursorPlaceholder},
367            {"clang-format", &FormattingChanges},
368            {"Remove placeholder", &RemoveCursorPlaceholder}}) {
369     Final = Final.merge(*Pass.second);
370     FinalCursor = Pass.second->getShiftedCodePosition(FinalCursor);
371 #ifndef NDEBUG
372     FinalCode =
373         cantFail(tooling::applyAllReplacements(FinalCode, *Pass.second));
374     dlog("After {0}:\n{1}^{2}", Pass.first,
375          StringRef(FinalCode).take_front(FinalCursor),
376          StringRef(FinalCode).drop_front(FinalCursor));
377 #endif
378   }
379   return split(Final, OriginalCursor, FinalCursor);
380 }
381 
382 unsigned
transformCursorPosition(unsigned Offset,const std::vector<tooling::Replacement> & Replacements)383 transformCursorPosition(unsigned Offset,
384                         const std::vector<tooling::Replacement> &Replacements) {
385   unsigned OriginalOffset = Offset;
386   for (const auto &R : Replacements) {
387     if (R.getOffset() + R.getLength() <= OriginalOffset) {
388       // Replacement is before cursor.
389       Offset += R.getReplacementText().size();
390       Offset -= R.getLength();
391     } else if (R.getOffset() < OriginalOffset) {
392       // Replacement overlaps cursor.
393       // Preserve position within replacement text, as far as possible.
394       unsigned PositionWithinReplacement = Offset - R.getOffset();
395       if (PositionWithinReplacement > R.getReplacementText().size()) {
396         Offset += R.getReplacementText().size();
397         Offset -= PositionWithinReplacement;
398       }
399     } else {
400       // Replacement after cursor.
401       break; // Replacements are sorted, the rest are also after the cursor.
402     }
403   }
404   return Offset;
405 }
406 
407 } // namespace clangd
408 } // namespace clang
409