1 //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that 11 /// fixes namespace end comments. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "NamespaceEndCommentsFixer.h" 16 17 #define DEBUG_TYPE "namespace-end-comments-fixer" 18 19 namespace clang { 20 namespace format { 21 22 namespace { 23 // Iterates all tokens starting from StartTok to EndTok and apply Fn to all 24 // tokens between them including StartTok and EndTok. Returns the token after 25 // EndTok. 26 const FormatToken * 27 processTokens(const FormatToken *Tok, tok::TokenKind StartTok, 28 tok::TokenKind EndTok, 29 llvm::function_ref<void(const FormatToken *)> Fn) { 30 if (!Tok || Tok->isNot(StartTok)) 31 return Tok; 32 int NestLevel = 0; 33 do { 34 if (Tok->is(StartTok)) 35 ++NestLevel; 36 else if (Tok->is(EndTok)) 37 --NestLevel; 38 if (Fn) 39 Fn(Tok); 40 Tok = Tok->getNextNonComment(); 41 } while (Tok && NestLevel > 0); 42 return Tok; 43 } 44 45 const FormatToken *skipAttribute(const FormatToken *Tok) { 46 if (!Tok) 47 return nullptr; 48 if (Tok->isAttribute()) { 49 Tok = Tok->getNextNonComment(); 50 Tok = processTokens(Tok, tok::l_paren, tok::r_paren, nullptr); 51 } else if (Tok->is(tok::l_square)) { 52 Tok = processTokens(Tok, tok::l_square, tok::r_square, nullptr); 53 } 54 return Tok; 55 } 56 57 // Computes the name of a namespace given the namespace token. 58 // Returns "" for anonymous namespace. 59 std::string computeName(const FormatToken *NamespaceTok) { 60 assert(NamespaceTok && 61 NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 62 "expecting a namespace token"); 63 std::string name; 64 const FormatToken *Tok = NamespaceTok->getNextNonComment(); 65 if (NamespaceTok->is(TT_NamespaceMacro)) { 66 // Collects all the non-comment tokens between opening parenthesis 67 // and closing parenthesis or comma. 68 assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis"); 69 Tok = Tok->getNextNonComment(); 70 while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) { 71 name += Tok->TokenText; 72 Tok = Tok->getNextNonComment(); 73 } 74 return name; 75 } 76 Tok = skipAttribute(Tok); 77 78 std::string FirstNSName; 79 // For `namespace [[foo]] A::B::inline C {` or 80 // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C". 81 // Peek for the first '::' (or '{' or '(')) and then return all tokens from 82 // one token before that up until the '{'. A '(' might be a macro with 83 // arguments. 84 const FormatToken *FirstNSTok = nullptr; 85 while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) { 86 if (FirstNSTok) 87 FirstNSName += FirstNSTok->TokenText; 88 FirstNSTok = Tok; 89 Tok = Tok->getNextNonComment(); 90 } 91 92 if (FirstNSTok) 93 Tok = FirstNSTok; 94 Tok = skipAttribute(Tok); 95 96 FirstNSTok = nullptr; 97 // Add everything from '(' to ')'. 98 auto AddToken = [&name](const FormatToken *Tok) { name += Tok->TokenText; }; 99 bool IsPrevColoncolon = false; 100 bool HasColoncolon = false; 101 bool IsPrevInline = false; 102 bool NameFinished = false; 103 // If we found '::' in name, then it's the name. Otherwise, we can't tell 104 // which one is name. For example, `namespace A B {`. 105 while (Tok && Tok->isNot(tok::l_brace)) { 106 if (FirstNSTok) { 107 if (!IsPrevInline && HasColoncolon && !IsPrevColoncolon) { 108 if (FirstNSTok->is(tok::l_paren)) { 109 FirstNSTok = Tok = 110 processTokens(FirstNSTok, tok::l_paren, tok::r_paren, AddToken); 111 continue; 112 } 113 if (FirstNSTok->isNot(tok::coloncolon)) { 114 NameFinished = true; 115 break; 116 } 117 } 118 name += FirstNSTok->TokenText; 119 IsPrevColoncolon = FirstNSTok->is(tok::coloncolon); 120 HasColoncolon = HasColoncolon || IsPrevColoncolon; 121 if (FirstNSTok->is(tok::kw_inline)) { 122 name += " "; 123 IsPrevInline = true; 124 } 125 } 126 FirstNSTok = Tok; 127 Tok = Tok->getNextNonComment(); 128 const FormatToken *TokAfterAttr = skipAttribute(Tok); 129 if (TokAfterAttr != Tok) 130 FirstNSTok = Tok = TokAfterAttr; 131 } 132 if (!NameFinished && FirstNSTok && FirstNSTok->isNot(tok::l_brace)) 133 name += FirstNSTok->TokenText; 134 if (FirstNSName.empty() || HasColoncolon) 135 return name; 136 return name.empty() ? FirstNSName : FirstNSName + " " + name; 137 } 138 139 std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline, 140 const FormatToken *NamespaceTok, 141 unsigned SpacesToAdd) { 142 std::string text = "//"; 143 text.append(SpacesToAdd, ' '); 144 text += NamespaceTok->TokenText; 145 if (NamespaceTok->is(TT_NamespaceMacro)) 146 text += "("; 147 else if (!NamespaceName.empty()) 148 text += ' '; 149 text += NamespaceName; 150 if (NamespaceTok->is(TT_NamespaceMacro)) 151 text += ")"; 152 if (AddNewline) 153 text += '\n'; 154 return text; 155 } 156 157 bool hasEndComment(const FormatToken *RBraceTok) { 158 return RBraceTok->Next && RBraceTok->Next->is(tok::comment); 159 } 160 161 bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName, 162 const FormatToken *NamespaceTok) { 163 assert(hasEndComment(RBraceTok)); 164 const FormatToken *Comment = RBraceTok->Next; 165 166 // Matches a valid namespace end comment. 167 // Valid namespace end comments don't need to be edited. 168 static const llvm::Regex NamespaceCommentPattern = 169 llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" 170 "namespace( +([a-zA-Z0-9:_ ]+))?\\.? *(\\*/)?$", 171 llvm::Regex::IgnoreCase); 172 static const llvm::Regex NamespaceMacroCommentPattern = 173 llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" 174 "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*|\".+\")\\)\\.? *(\\*/)?$", 175 llvm::Regex::IgnoreCase); 176 177 SmallVector<StringRef, 8> Groups; 178 if (NamespaceTok->is(TT_NamespaceMacro) && 179 NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) { 180 StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : ""; 181 // The name of the macro must be used. 182 if (NamespaceTokenText != NamespaceTok->TokenText) 183 return false; 184 } else if (NamespaceTok->isNot(tok::kw_namespace) || 185 !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) { 186 // Comment does not match regex. 187 return false; 188 } 189 StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5].rtrim() : ""; 190 // Anonymous namespace comments must not mention a namespace name. 191 if (NamespaceName.empty() && !NamespaceNameInComment.empty()) 192 return false; 193 StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : ""; 194 // Named namespace comments must not mention anonymous namespace. 195 if (!NamespaceName.empty() && !AnonymousInComment.empty()) 196 return false; 197 if (NamespaceNameInComment == NamespaceName) 198 return true; 199 200 // Has namespace comment flowed onto the next line. 201 // } // namespace 202 // // verylongnamespacenamethatdidnotfitonthepreviouscommentline 203 if (!(Comment->Next && Comment->Next->is(TT_LineComment))) 204 return false; 205 206 static const llvm::Regex CommentPattern = llvm::Regex( 207 "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase); 208 209 // Pull out just the comment text. 210 if (!CommentPattern.match(Comment->Next->TokenText, &Groups)) 211 return false; 212 NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : ""; 213 214 return NamespaceNameInComment == NamespaceName; 215 } 216 217 void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, 218 const SourceManager &SourceMgr, 219 tooling::Replacements *Fixes) { 220 auto EndLoc = RBraceTok->Tok.getEndLoc(); 221 auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc); 222 auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); 223 if (Err) { 224 llvm::errs() << "Error while adding namespace end comment: " 225 << llvm::toString(std::move(Err)) << "\n"; 226 } 227 } 228 229 void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, 230 const SourceManager &SourceMgr, 231 tooling::Replacements *Fixes) { 232 assert(hasEndComment(RBraceTok)); 233 const FormatToken *Comment = RBraceTok->Next; 234 auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(), 235 Comment->Tok.getEndLoc()); 236 auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); 237 if (Err) { 238 llvm::errs() << "Error while updating namespace end comment: " 239 << llvm::toString(std::move(Err)) << "\n"; 240 } 241 } 242 } // namespace 243 244 const FormatToken * 245 getNamespaceToken(const AnnotatedLine *Line, 246 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 247 if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace)) 248 return nullptr; 249 size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex; 250 if (StartLineIndex == UnwrappedLine::kInvalidIndex) 251 return nullptr; 252 assert(StartLineIndex < AnnotatedLines.size()); 253 const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First; 254 if (NamespaceTok->is(tok::l_brace)) { 255 // "namespace" keyword can be on the line preceding '{', e.g. in styles 256 // where BraceWrapping.AfterNamespace is true. 257 if (StartLineIndex > 0) { 258 NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First; 259 if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi)) 260 return nullptr; 261 } 262 } 263 264 return NamespaceTok->getNamespaceToken(); 265 } 266 267 StringRef 268 getNamespaceTokenText(const AnnotatedLine *Line, 269 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 270 const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines); 271 return NamespaceTok ? NamespaceTok->TokenText : StringRef(); 272 } 273 274 NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env, 275 const FormatStyle &Style) 276 : TokenAnalyzer(Env, Style) {} 277 278 std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze( 279 TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 280 FormatTokenLexer &Tokens) { 281 const SourceManager &SourceMgr = Env.getSourceManager(); 282 AffectedRangeMgr.computeAffectedLines(AnnotatedLines); 283 tooling::Replacements Fixes; 284 285 // Spin through the lines and ensure we have balanced braces. 286 int Braces = 0; 287 for (AnnotatedLine *Line : AnnotatedLines) { 288 FormatToken *Tok = Line->First; 289 while (Tok) { 290 Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0; 291 Tok = Tok->Next; 292 } 293 } 294 // Don't attempt to comment unbalanced braces or this can 295 // lead to comments being placed on the closing brace which isn't 296 // the matching brace of the namespace. (occurs during incomplete editing). 297 if (Braces != 0) 298 return {Fixes, 0}; 299 300 std::string AllNamespaceNames; 301 size_t StartLineIndex = SIZE_MAX; 302 StringRef NamespaceTokenText; 303 unsigned int CompactedNamespacesCount = 0; 304 for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { 305 const AnnotatedLine *EndLine = AnnotatedLines[I]; 306 const FormatToken *NamespaceTok = 307 getNamespaceToken(EndLine, AnnotatedLines); 308 if (!NamespaceTok) 309 continue; 310 FormatToken *RBraceTok = EndLine->First; 311 if (RBraceTok->Finalized) 312 continue; 313 RBraceTok->Finalized = true; 314 const FormatToken *EndCommentPrevTok = RBraceTok; 315 // Namespaces often end with '};'. In that case, attach namespace end 316 // comments to the semicolon tokens. 317 if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) 318 EndCommentPrevTok = RBraceTok->Next; 319 if (StartLineIndex == SIZE_MAX) 320 StartLineIndex = EndLine->MatchingOpeningBlockLineIndex; 321 std::string NamespaceName = computeName(NamespaceTok); 322 if (Style.CompactNamespaces) { 323 if (CompactedNamespacesCount == 0) 324 NamespaceTokenText = NamespaceTok->TokenText; 325 if ((I + 1 < E) && 326 NamespaceTokenText == 327 getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) && 328 StartLineIndex - CompactedNamespacesCount - 1 == 329 AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex && 330 !AnnotatedLines[I + 1]->First->Finalized) { 331 if (hasEndComment(EndCommentPrevTok)) { 332 // remove end comment, it will be merged in next one 333 updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes); 334 } 335 ++CompactedNamespacesCount; 336 if (!NamespaceName.empty()) 337 AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames; 338 continue; 339 } 340 NamespaceName += AllNamespaceNames; 341 CompactedNamespacesCount = 0; 342 AllNamespaceNames = std::string(); 343 } 344 // The next token in the token stream after the place where the end comment 345 // token must be. This is either the next token on the current line or the 346 // first token on the next line. 347 const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next; 348 if (EndCommentNextTok && EndCommentNextTok->is(tok::comment)) 349 EndCommentNextTok = EndCommentNextTok->Next; 350 if (!EndCommentNextTok && I + 1 < E) 351 EndCommentNextTok = AnnotatedLines[I + 1]->First; 352 bool AddNewline = EndCommentNextTok && 353 EndCommentNextTok->NewlinesBefore == 0 && 354 EndCommentNextTok->isNot(tok::eof); 355 const std::string EndCommentText = 356 computeEndCommentText(NamespaceName, AddNewline, NamespaceTok, 357 Style.SpacesInLineCommentPrefix.Minimum); 358 if (!hasEndComment(EndCommentPrevTok)) { 359 unsigned LineCount = 0; 360 for (auto J = StartLineIndex + 1; J < I; ++J) 361 LineCount += AnnotatedLines[J]->size(); 362 if (LineCount > Style.ShortNamespaceLines) { 363 addEndComment(EndCommentPrevTok, 364 std::string(Style.SpacesBeforeTrailingComments, ' ') + 365 EndCommentText, 366 SourceMgr, &Fixes); 367 } 368 } else if (!validEndComment(EndCommentPrevTok, NamespaceName, 369 NamespaceTok)) { 370 updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes); 371 } 372 StartLineIndex = SIZE_MAX; 373 } 374 return {Fixes, 0}; 375 } 376 377 } // namespace format 378 } // namespace clang 379