xref: /llvm-project/clang/lib/Format/NamespaceEndCommentsFixer.cpp (revision b92d6dd704d789240685a336ad8b25a9f381b4cc)
1 //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
11 /// fixes namespace end comments.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "NamespaceEndCommentsFixer.h"
16 
17 #define DEBUG_TYPE "namespace-end-comments-fixer"
18 
19 namespace clang {
20 namespace format {
21 
22 namespace {
23 // Iterates all tokens starting from StartTok to EndTok and apply Fn to all
24 // tokens between them including StartTok and EndTok. Returns the token after
25 // EndTok.
26 const FormatToken *
27 processTokens(const FormatToken *Tok, tok::TokenKind StartTok,
28               tok::TokenKind EndTok,
29               llvm::function_ref<void(const FormatToken *)> Fn) {
30   if (!Tok || Tok->isNot(StartTok))
31     return Tok;
32   int NestLevel = 0;
33   do {
34     if (Tok->is(StartTok))
35       ++NestLevel;
36     else if (Tok->is(EndTok))
37       --NestLevel;
38     if (Fn)
39       Fn(Tok);
40     Tok = Tok->getNextNonComment();
41   } while (Tok && NestLevel > 0);
42   return Tok;
43 }
44 
45 const FormatToken *skipAttribute(const FormatToken *Tok) {
46   if (!Tok)
47     return nullptr;
48   if (Tok->isAttribute()) {
49     Tok = Tok->getNextNonComment();
50     Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr);
51   } else if (Tok->is(tok::l_square)) {
52     Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr);
53   }
54   return Tok;
55 }
56 
57 // Computes the name of a namespace given the namespace token.
58 // Returns "" for anonymous namespace.
59 std::string computeName(const FormatToken *NamespaceTok) {
60   assert(NamespaceTok &&
61          NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
62          "expecting a namespace token");
63   std::string name;
64   const FormatToken *Tok = NamespaceTok->getNextNonComment();
65   if (NamespaceTok->is(TT_NamespaceMacro)) {
66     // Collects all the non-comment tokens between opening parenthesis
67     // and closing parenthesis or comma.
68     assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
69     Tok = Tok->getNextNonComment();
70     while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
71       name += Tok->TokenText;
72       Tok = Tok->getNextNonComment();
73     }
74     return name;
75   }
76   Tok = skipAttribute(Tok);
77 
78   std::string FirstNSName;
79   // For `namespace [[foo]] A::B::inline C {` or
80   // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
81   // Peek for the first '::' (or '{' or '(')) and then return all tokens from
82   // one token before that up until the '{'. A '(' might be a macro with
83   // arguments.
84   const FormatToken *FirstNSTok = nullptr;
85   while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
86     if (FirstNSTok)
87       FirstNSName += FirstNSTok->TokenText;
88     FirstNSTok = Tok;
89     Tok = Tok->getNextNonComment();
90   }
91 
92   if (FirstNSTok)
93     Tok = FirstNSTok;
94   Tok = skipAttribute(Tok);
95 
96   FirstNSTok = nullptr;
97   // Add everything from '(' to ')'.
98   auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; };
99   bool IsPrevColoncolon = false;
100   bool HasColoncolon = false;
101   bool IsPrevInline = false;
102   bool NameFinished = false;
103   // If we found '::' in name, then it's the name. Otherwise, we can't tell
104   // which one is name. For example, `namespace A B {`.
105   while (Tok && Tok->isNot(tok::l_brace)) {
106     if (FirstNSTok) {
107       if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) {
108         if (FirstNSTok->is(tok::l_paren)) {
109           FirstNSTok = Tok =
110               processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken);
111           continue;
112         }
113         if (FirstNSTok->isNot(tok::coloncolon)) {
114           NameFinished = true;
115           break;
116         }
117       }
118       name += FirstNSTok->TokenText;
119       IsPrevColoncolon = FirstNSTok->is(tok::coloncolon);
120       HasColoncolon = HasColoncolon || IsPrevColoncolon;
121       if (FirstNSTok->is(tok::kw_inline)) {
122         name += " ";
123         IsPrevInline = true;
124       }
125     }
126     FirstNSTok = Tok;
127     Tok = Tok->getNextNonComment();
128     const FormatToken *TokAfterAttr = skipAttribute(Tok);
129     if (TokAfterAttr != Tok)
130       FirstNSTok = Tok = TokAfterAttr;
131   }
132   if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace))
133     name += FirstNSTok->TokenText;
134   if (FirstNSName.empty() || HasColoncolon)
135     return name;
136   return name.empty() ? FirstNSName : FirstNSName + " " + name;
137 }
138 
139 std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
140                                   const FormatToken *NamespaceTok,
141                                   unsigned SpacesToAdd) {
142   std::string text = "//";
143   text.append(SpacesToAdd, ' ');
144   text += NamespaceTok->TokenText;
145   if (NamespaceTok->is(TT_NamespaceMacro))
146     text += "(";
147   else if (!NamespaceName.empty())
148     text += ' ';
149   text += NamespaceName;
150   if (NamespaceTok->is(TT_NamespaceMacro))
151     text += ")";
152   if (AddNewline)
153     text += '\n';
154   return text;
155 }
156 
157 bool hasEndComment(const FormatToken *RBraceTok) {
158   return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
159 }
160 
161 bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
162                      const FormatToken *NamespaceTok) {
163   assert(hasEndComment(RBraceTok));
164   const FormatToken *Comment = RBraceTok->Next;
165 
166   // Matches a valid namespace end comment.
167   // Valid namespace end comments don't need to be edited.
168   static const llvm::Regex NamespaceCommentPattern =
169       llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
170                   "namespace( +([a-zA-Z0-9:_ ]+))?\\.? *(\\*/)?$",
171                   llvm::Regex::IgnoreCase);
172   static const llvm::Regex NamespaceMacroCommentPattern =
173       llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
174                   "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*|\".+\")\\)\\.? *(\\*/)?$",
175                   llvm::Regex::IgnoreCase);
176 
177   SmallVector<StringRef, 8> Groups;
178   if (NamespaceTok->is(TT_NamespaceMacro) &&
179       NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
180     StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
181     // The name of the macro must be used.
182     if (NamespaceTokenText != NamespaceTok->TokenText)
183       return false;
184   } else if (NamespaceTok->isNot(tok::kw_namespace) ||
185              !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
186     // Comment does not match regex.
187     return false;
188   }
189   StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5].rtrim() : "";
190   // Anonymous namespace comments must not mention a namespace name.
191   if (NamespaceName.empty() && !NamespaceNameInComment.empty())
192     return false;
193   StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
194   // Named namespace comments must not mention anonymous namespace.
195   if (!NamespaceName.empty() && !AnonymousInComment.empty())
196     return false;
197   if (NamespaceNameInComment == NamespaceName)
198     return true;
199 
200   // Has namespace comment flowed onto the next line.
201   // } // namespace
202   //   // verylongnamespacenamethatdidnotfitonthepreviouscommentline
203   if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
204     return false;
205 
206   static const llvm::Regex CommentPattern = llvm::Regex(
207       "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);
208 
209   // Pull out just the comment text.
210   if (!CommentPattern.match(Comment->Next->TokenText, &Groups))
211     return false;
212   NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";
213 
214   return NamespaceNameInComment == NamespaceName;
215 }
216 
217 void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
218                    const SourceManager &SourceMgr,
219                    tooling::Replacements *Fixes) {
220   auto EndLoc = RBraceTok->Tok.getEndLoc();
221   auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
222   auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
223   if (Err) {
224     llvm::errs() << "Error while adding namespace end comment: "
225                  << llvm::toString(std::move(Err)) << "\n";
226   }
227 }
228 
229 void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
230                       const SourceManager &SourceMgr,
231                       tooling::Replacements *Fixes) {
232   assert(hasEndComment(RBraceTok));
233   const FormatToken *Comment = RBraceTok->Next;
234   auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
235                                              Comment->Tok.getEndLoc());
236   auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
237   if (Err) {
238     llvm::errs() << "Error while updating namespace end comment: "
239                  << llvm::toString(std::move(Err)) << "\n";
240   }
241 }
242 } // namespace
243 
244 const FormatToken *
245 getNamespaceToken(const AnnotatedLine *Line,
246                   const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
247   if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
248     return nullptr;
249   size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
250   if (StartLineIndex == UnwrappedLine::kInvalidIndex)
251     return nullptr;
252   assert(StartLineIndex < AnnotatedLines.size());
253   const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
254   if (NamespaceTok->is(tok::l_brace)) {
255     // "namespace" keyword can be on the line preceding '{', e.g. in styles
256     // where BraceWrapping.AfterNamespace is true.
257     if (StartLineIndex > 0) {
258       NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
259       if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi))
260         return nullptr;
261     }
262   }
263 
264   return NamespaceTok->getNamespaceToken();
265 }
266 
267 StringRef
268 getNamespaceTokenText(const AnnotatedLine *Line,
269                       const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
270   const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
271   return NamespaceTok ? NamespaceTok->TokenText : StringRef();
272 }
273 
274 NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
275                                                      const FormatStyle &Style)
276     : TokenAnalyzer(Env, Style) {}
277 
278 std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
279     TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
280     FormatTokenLexer &Tokens) {
281   const SourceManager &SourceMgr = Env.getSourceManager();
282   AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
283   tooling::Replacements Fixes;
284 
285   // Spin through the lines and ensure we have balanced braces.
286   int Braces = 0;
287   for (AnnotatedLine *Line : AnnotatedLines) {
288     FormatToken *Tok = Line->First;
289     while (Tok) {
290       Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
291       Tok = Tok->Next;
292     }
293   }
294   // Don't attempt to comment unbalanced braces or this can
295   // lead to comments being placed on the closing brace which isn't
296   // the matching brace of the namespace. (occurs during incomplete editing).
297   if (Braces != 0)
298     return {Fixes, 0};
299 
300   std::string AllNamespaceNames;
301   size_t StartLineIndex = SIZE_MAX;
302   StringRef NamespaceTokenText;
303   unsigned int CompactedNamespacesCount = 0;
304   for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
305     const AnnotatedLine *EndLine = AnnotatedLines[I];
306     const FormatToken *NamespaceTok =
307         getNamespaceToken(EndLine, AnnotatedLines);
308     if (!NamespaceTok)
309       continue;
310     FormatToken *RBraceTok = EndLine->First;
311     if (RBraceTok->Finalized)
312       continue;
313     RBraceTok->Finalized = true;
314     const FormatToken *EndCommentPrevTok = RBraceTok;
315     // Namespaces often end with '};'. In that case, attach namespace end
316     // comments to the semicolon tokens.
317     if (RBraceTok->Next && RBraceTok->Next->is(tok::semi))
318       EndCommentPrevTok = RBraceTok->Next;
319     if (StartLineIndex == SIZE_MAX)
320       StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
321     std::string NamespaceName = computeName(NamespaceTok);
322     if (Style.CompactNamespaces) {
323       if (CompactedNamespacesCount == 0)
324         NamespaceTokenText = NamespaceTok->TokenText;
325       if ((I + 1 < E) &&
326           NamespaceTokenText ==
327               getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
328           StartLineIndex - CompactedNamespacesCount - 1 ==
329               AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
330           !AnnotatedLines[I + 1]->First->Finalized) {
331         if (hasEndComment(EndCommentPrevTok)) {
332           // remove end comment, it will be merged in next one
333           updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
334         }
335         ++CompactedNamespacesCount;
336         if (!NamespaceName.empty())
337           AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
338         continue;
339       }
340       NamespaceName += AllNamespaceNames;
341       CompactedNamespacesCount = 0;
342       AllNamespaceNames = std::string();
343     }
344     // The next token in the token stream after the place where the end comment
345     // token must be. This is either the next token on the current line or the
346     // first token on the next line.
347     const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
348     if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
349       EndCommentNextTok = EndCommentNextTok->Next;
350     if (!EndCommentNextTok && I + 1 < E)
351       EndCommentNextTok = AnnotatedLines[I + 1]->First;
352     bool AddNewline = EndCommentNextTok &&
353                       EndCommentNextTok->NewlinesBefore == 0 &&
354                       EndCommentNextTok->isNot(tok::eof);
355     const std::string EndCommentText =
356         computeEndCommentText(NamespaceName, AddNewline, NamespaceTok,
357                               Style.SpacesInLineCommentPrefix.Minimum);
358     if (!hasEndComment(EndCommentPrevTok)) {
359       unsigned LineCount = 0;
360       for (auto J = StartLineIndex + 1; J < I; ++J)
361         LineCount += AnnotatedLines[J]->size();
362       if (LineCount > Style.ShortNamespaceLines) {
363         addEndComment(EndCommentPrevTok,
364                       std::string(Style.SpacesBeforeTrailingComments, ' ') +
365                           EndCommentText,
366                       SourceMgr, &Fixes);
367       }
368     } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
369                                 NamespaceTok)) {
370       updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
371     }
372     StartLineIndex = SIZE_MAX;
373   }
374   return {Fixes, 0};
375 }
376 
377 } // namespace format
378 } // namespace clang
379